def load_data_product(self): dset_i = 0 dataset_management = DatasetManagementServiceClient() pubsub_management = PubsubManagementServiceClient() data_product_management = DataProductManagementServiceClient() resource_registry = self.container.instance.resource_registry tdom, sdom = time_series_domain() tdom = tdom.dump() sdom = sdom.dump() dp_obj = DataProduct( name='instrument_data_product_%i' % dset_i, description='ctd stream test', processing_level_code='Parsed_Canonical', temporal_domain = tdom, spatial_domain = sdom) pdict_id = dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = pubsub_management.create_stream_definition(name='parsed', parameter_dictionary_id=pdict_id) self.addCleanup(pubsub_management.delete_stream_definition, stream_def_id) data_product_id = data_product_management.create_data_product(data_product=dp_obj, stream_definition_id=stream_def_id) self.addCleanup(data_product_management.delete_data_product, data_product_id) data_product_management.activate_data_product_persistence(data_product_id) self.addCleanup(data_product_management.suspend_data_product_persistence, data_product_id) stream_ids, assocs = resource_registry.find_objects(subject=data_product_id, predicate='hasStream', id_only=True) stream_id = stream_ids[0] route = pubsub_management.read_stream_route(stream_id) dataset_ids, assocs = resource_registry.find_objects(subject=data_product_id, predicate='hasDataset', id_only=True) dataset_id = dataset_ids[0] return data_product_id, stream_id, route, stream_def_id, dataset_id
def load_data_product(self): dset_i = 0 dataset_management = DatasetManagementServiceClient() pubsub_management = PubsubManagementServiceClient() data_product_management = DataProductManagementServiceClient() resource_registry = self.container.instance.resource_registry dp_obj = DataProduct( name='instrument_data_product_%i' % dset_i, description='ctd stream test', processing_level_code='Parsed_Canonical') pdict_id = dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = pubsub_management.create_stream_definition(name='parsed', parameter_dictionary_id=pdict_id) self.addCleanup(pubsub_management.delete_stream_definition, stream_def_id) data_product_id = data_product_management.create_data_product(data_product=dp_obj, stream_definition_id=stream_def_id) self.addCleanup(data_product_management.delete_data_product, data_product_id) data_product_management.activate_data_product_persistence(data_product_id) self.addCleanup(data_product_management.suspend_data_product_persistence, data_product_id) stream_ids, assocs = resource_registry.find_objects(subject=data_product_id, predicate='hasStream', id_only=True) stream_id = stream_ids[0] route = pubsub_management.read_stream_route(stream_id) dataset_ids, assocs = resource_registry.find_objects(subject=data_product_id, predicate='hasDataset', id_only=True) dataset_id = dataset_ids[0] return data_product_id, stream_id, route, stream_def_id, dataset_id
def publish_rdt_to_data_product(cls,data_product_id, rdt, connection_id='', connection_index=''): resource_registry = Container.instance.resource_registry pubsub_management = PubsubManagementServiceClient() stream_ids, _ = resource_registry.find_objects(data_product_id,'hasStream',id_only=True) stream_id = stream_ids[0] route = pubsub_management.read_stream_route(stream_id) publisher = StandaloneStreamPublisher(stream_id,route) publisher.publish(rdt.to_granule(connection_id=connection_id, connection_index=connection_index))
class TestTransformPrime(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url( 'res/deploy/r2deploy.yml') # Because hey why not?! self.dataset_management = DatasetManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() def setup_streams(self): in_pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'sbe37_L0_test', id_only=True) out_pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'sbe37_L1_test', id_only=True) in_stream_def_id = self.pubsub_management.create_stream_definition( 'L0 SBE37', parameter_dictionary_id=in_pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_def_id) out_stream_def_id = self.pubsub_management.create_stream_definition( 'L1 SBE37', parameter_dictionary_id=out_pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_def_id) in_stream_id, in_route = self.pubsub_management.create_stream( 'L0 input', stream_definition_id=in_stream_def_id, exchange_point='test') self.addCleanup(self.pubsub_management.delete_stream, in_stream_id) out_stream_id, out_route = self.pubsub_management.create_stream( 'L0 output', stream_definition_id=out_stream_def_id, exchange_point='test') self.addCleanup(self.pubsub_management.delete_stream, out_stream_id) return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)] def setup_advanced_streams(self): in_pdict_id = out_pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'sbe37_LC_TEST', id_only=True) in_stream_def_id = self.pubsub_management.create_stream_definition( 'sbe37_instrument', parameter_dictionary_id=in_pdict_id, available_fields=[ 'time', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'lat', 'lon' ]) self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_def_id) out_stream_def_id = self.pubsub_management.create_stream_definition( 'sbe37_l2', parameter_dictionary_id=out_pdict_id, available_fields=['time', 'rho', 'PRACSAL_L2']) self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_def_id) in_stream_id, in_route = self.pubsub_management.create_stream( 'instrument stream', stream_definition_id=in_stream_def_id, exchange_point='test') self.addCleanup(self.pubsub_management.delete_stream, in_stream_id) out_stream_id, out_route = self.pubsub_management.create_stream( 'data product stream', stream_definition_id=out_stream_def_id, exchange_point='test') self.addCleanup(self.pubsub_management.delete_stream, out_stream_id) return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)] def preload(self): config = DotDict() config.op = 'load' config.scenario = 'BASE,LC_TEST' config.categories = 'ParameterFunctions,ParameterDefs,ParameterDictionary' config.path = 'res/preload/r2_ioc' self.container.spawn_process('preload', 'ion.processes.bootstrap.ion_loader', 'IONLoader', config) def setup_advanced_transform(self): self.preload() queue_name = 'transform_prime' stream_info = self.setup_advanced_streams() in_stream_id, in_stream_def_id = stream_info[0] out_stream_id, out_stream_def_id = stream_info[1] routes = {} routes[(in_stream_id, out_stream_id)] = None config = DotDict() config.process.queue_name = queue_name config.process.routes = routes config.process.publish_streams = {out_stream_id: out_stream_id} sub_id = self.pubsub_management.create_subscription( queue_name, stream_ids=[in_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) self.container.spawn_process( 'transform_prime', 'ion.processes.data.transforms.transform_prime', 'TransformPrime', config) listen_sub_id = self.pubsub_management.create_subscription( 'listener', stream_ids=[out_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, listen_sub_id) self.pubsub_management.activate_subscription(listen_sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, listen_sub_id) return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)] def setup_transform(self): self.preload() queue_name = 'transform_prime' stream_info = self.setup_streams() in_stream_id, in_stream_def_id = stream_info[0] out_stream_id, out_stream_def_id = stream_info[1] routes = {} routes[(in_stream_id, out_stream_id)] = None config = DotDict() config.process.queue_name = queue_name config.process.routes = routes config.process.publish_streams = {out_stream_id: out_stream_id} sub_id = self.pubsub_management.create_subscription( queue_name, stream_ids=[in_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) self.container.spawn_process( 'transform_prime', 'ion.processes.data.transforms.transform_prime', 'TransformPrime', config) listen_sub_id = self.pubsub_management.create_subscription( 'listener', stream_ids=[out_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, listen_sub_id) self.pubsub_management.activate_subscription(listen_sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, listen_sub_id) return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)] def setup_validator(self, validator): listener = StandaloneStreamSubscriber('listener', validator) listener.start() self.addCleanup(listener.stop) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_execute_advanced_transform(self): # Runs a transform across L0-L2 with stream definitions including available fields streams = self.setup_advanced_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_defs_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['rho'], np.array([1001.0055034])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool( stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2)) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_execute_transform(self): streams = self.setup_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_def_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])): return if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])): return if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool( stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2))
class TestTransformWorker(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Instantiate a process to represent the test process=TransformWorkerTestProcess() self.dataset_management_client = DatasetManagementServiceClient(node=self.container.node) self.pubsub_client = PubsubManagementServiceClient(node=self.container.node) self.dataproductclient = DataProductManagementServiceClient(node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node) self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.imsclient = InstrumentManagementServiceProcessClient(node=self.container.node, process = process) self.time_dom, self.spatial_dom = time_series_domain() self.ph = ParameterHelper(self.dataset_management_client, self.addCleanup) self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10) def push_granule(self, data_product_id): ''' Publishes and monitors that the granule arrived ''' datasets, _ = self.rrclient.find_objects(data_product_id, PRED.hasDataset, id_only=True) dataset_monitor = DatasetMonitor(datasets[0]) rdt = self.ph.rdt_for_data_product(data_product_id) self.ph.fill_parsed_rdt(rdt) self.ph.publish_rdt_to_data_product(data_product_id, rdt) assert dataset_monitor.wait() dataset_monitor.stop() @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.dp_list = [] self.data_process_objs = [] self._output_stream_ids = [] self.granule_verified = Event() self.worker_assigned_event_verified = Event() self.dp_created_event_verified = Event() self.heartbeat_event_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] self.start_event_listener() # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process() self.dp_list.append(dataprocess_id) # validate the repository for data product algorithms persists the new resources NEW SA-1 # create_data_process call created one of each dpd_ids, _ = self.rrclient.find_resources(restype=OT.DataProcessDefinition, id_only=False) # there will be more than one becuase of the DPDs that reperesent the PFs in the data product above self.assertTrue(dpd_ids is not None) dp_ids, _ = self.rrclient.find_resources(restype=OT.DataProcess, id_only=False) # only one DP becuase the PFs that are in the code dataproduct above are not activated yet. self.assertEquals(len(dp_ids), 1) # validate the name and version label NEW SA - 2 dataprocessdef_obj = self.dataprocessclient.read_data_process_definition(dataprocessdef_id) self.assertEqual(dataprocessdef_obj.version_label, '1.0a') self.assertEqual(dataprocessdef_obj.name, 'add_arrays') # validate that the DPD has an attachment NEW SA - 21 attachment_ids, assoc_ids = self.rrclient.find_objects(dataprocessdef_id, PRED.hasAttachment, RT.Attachment, True) self.assertEqual(len(attachment_ids), 1) attachment_obj = self.rrclient.read_attachment(attachment_ids[0]) log.debug('attachment: %s', attachment_obj) # validate that the data process resource has input and output data products associated # L4-CI-SA-RQ-364 and NEW SA-3 outproduct_ids, assoc_ids = self.rrclient.find_objects(dataprocess_id, PRED.hasOutputProduct, RT.DataProduct, True) self.assertEqual(len(outproduct_ids), 1) inproduct_ids, assoc_ids = self.rrclient.find_objects(dataprocess_id, PRED.hasInputProduct, RT.DataProduct, True) self.assertEqual(len(inproduct_ids), 1) # Test for provenance. Get Data product produced by the data processes output_data_product_id,_ = self.rrclient.find_objects(subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance(output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 2) self.assertTrue(self.input_dp_id in output_data_product_provenance[output_data_product_id[0]]['parents']) self.assertTrue(output_data_product_provenance[output_data_product_id[0]]['parents'][self.input_dp_id]['data_process_definition_id'] == dataprocessdef_id) # NEW SA - 4 | Data processing shall include the appropriate data product algorithm name and version number in # the metadata of each output data product created by the data product algorithm. output_data_product_obj,_ = self.rrclient.find_objects(subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=False) self.assertTrue(output_data_product_obj[0].name != None) self.assertTrue(output_data_product_obj[0]._rev != None) # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects(subject=dataprocess_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route ) for n in range(1, 101): rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) # validate that the output granule is received and the updated value is correct self.assertTrue(self.granule_verified.wait(self.wait_time)) # validate that the data process loaded into worker event is received (L4-CI-SA-RQ-182) self.assertTrue(self.worker_assigned_event_verified.wait(self.wait_time)) # validate that the data process create (with data product ids) event is received (NEW SA -42) self.assertTrue(self.dp_created_event_verified.wait(self.wait_time)) # validate that the data process heartbeat event is received (for every hundred granules processed) (L4-CI-SA-RQ-182) #this takes a while so set wait limit to large value self.assertTrue(self.heartbeat_event_verified.wait(200)) # validate that the code from the transform function can be retrieve via inspect_data_process_definition src = self.dataprocessclient.inspect_data_process_definition(dataprocessdef_id) self.assertIn( 'def add_arrays(a, b)', src) # now delete the DPD and DP then verify that the resources are retired so that information required for provenance are still available self.dataprocessclient.delete_data_process(dataprocess_id) self.dataprocessclient.delete_data_process_definition(dataprocessdef_id) in_dp_objs, _ = self.rrclient.find_objects(subject=dataprocess_id, predicate=PRED.hasInputProduct, object_type=RT.DataProduct, id_only=True) self.assertTrue(in_dp_objs is not None) dpd_objs, _ = self.rrclient.find_subjects(subject_type=RT.DataProcessDefinition, predicate=PRED.hasDataProcess, object=dataprocess_id, id_only=True) self.assertTrue(dpd_objs is not None) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker_with_instrumentdevice(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # Create CTD Parsed as the initial data product # create a stream definition for the data from the ctd simulator self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id) # only ever need one device for testing purposes. instDevice_obj,_ = self.rrclient.find_resources(restype=RT.InstrumentDevice, name='test_ctd_device') if instDevice_obj: instDevice_id = instDevice_obj[0]._id else: instDevice_obj = IonObject(RT.InstrumentDevice, name='test_ctd_device', description="test_ctd_device", serial_number="12345" ) instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj) self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=self.input_dp_id) # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process() self.addCleanup(self.dataprocessclient.delete_data_process, dataprocess_id) self.addCleanup(self.dataprocessclient.delete_data_process_definition, dataprocessdef_id) # Test for provenance. Get Data product produced by the data processes output_data_product_id,_ = self.rrclient.find_objects(subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance(output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 3) self.assertTrue(self.input_dp_id in output_data_product_provenance[output_data_product_id[0]]['parents']) self.assertTrue(instDevice_id in output_data_product_provenance[self.input_dp_id]['parents']) self.assertTrue(output_data_product_provenance[instDevice_id]['type'] == 'InstrumentDevice') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker_with_platformdevice(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # Create CTD Parsed as the initial data product # create a stream definition for the data from the ctd simulator self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id) # only ever need one device for testing purposes. platform_device_obj,_ = self.rrclient.find_resources(restype=RT.PlatformDevice, name='TestPlatform') if platform_device_obj: platform_device_id = platform_device_obj[0]._id else: platform_device_obj = IonObject(RT.PlatformDevice, name='TestPlatform', description="TestPlatform", serial_number="12345" ) platform_device_id = self.imsclient.create_platform_device(platform_device=platform_device_obj) self.damsclient.assign_data_product(input_resource_id=platform_device_id, data_product_id=self.input_dp_id) # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process() self.addCleanup(self.dataprocessclient.delete_data_process, dataprocess_id) self.addCleanup(self.dataprocessclient.delete_data_process_definition, dataprocessdef_id) # Test for provenance. Get Data product produced by the data processes output_data_product_id,_ = self.rrclient.find_objects(subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance(output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 3) self.assertTrue(self.input_dp_id in output_data_product_provenance[output_data_product_id[0]]['parents']) self.assertTrue(platform_device_id in output_data_product_provenance[self.input_dp_id]['parents']) self.assertTrue(output_data_product_provenance[platform_device_id]['type'] == 'PlatformDevice') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_event_transform_worker(self): self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # test that a data process (type: data-product-in / event-out) can be defined and launched. # verify that event fields are correctly populated self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] # create the DPD and two DPs self.event_data_process_id = self.create_event_data_processes() # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects(subject=self.event_data_process_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_event_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route ) self.start_event_transform_listener() self.data_modified = Event() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) self.assertTrue(self.event_verified.wait(self.wait_time)) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_bad_argument_map(self): self._output_stream_ids = [] # test that a data process (type: data-product-in / data-product-out) parameter mapping it validated during # data process creation and that the correct exception is raised for both input and output. self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # two data processes using one transform and one DPD dp1_func_output_dp_id = self.create_output_data_product() # Set up DPD and DP #2 - array add function tf_obj = IonObject(RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM, uri='http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject(RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS ) add_array_dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, add_array_dpd_id, binding='add_array_func' ) # create the data process with invalid argument map argument_map = {"arr1": "foo", "arr2": "bar"} output_param = "salinity" with self.assertRaises(BadRequest) as cm: dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) ex = cm.exception log.debug(' exception raised: %s', cm) self.assertEqual(ex.message, "Input data product does not contain the parameters defined in argument map") # create the data process with invalid output parameter name argument_map = {"arr1": "conductivity", "arr2": "pressure"} output_param = "foo" with self.assertRaises(BadRequest) as cm: dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) ex = cm.exception log.debug(' exception raised: %s', cm) self.assertEqual(ex.message, "Output data product does not contain the output parameter name provided") def create_event_data_processes(self): # two data processes using one transform and one DPD argument_map= {"a": "salinity"} # set up DPD and DP #2 - array add function tf_obj = IonObject(RT.TransformFunction, name='validate_salinity_array', description='validate_salinity_array', function='validate_salinity_array', module="ion.processes.data.transforms.test.test_transform_worker", arguments=['a'], function_type=TransformFunctionType.TRANSFORM ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject(RT.DataProcessDefinition, name='validate_salinity_array', description='validate_salinity_array', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, ) add_array_dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, add_array_dpd_id, binding='validate_salinity_array' ) # create the data process dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=None, argument_map=argument_map) self.damsclient.register_process(dp1_data_process_id) self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id) return dp1_data_process_id def create_data_process(self): # two data processes using one transform and one DPD dp1_func_output_dp_id = self.create_output_data_product() argument_map = {"arr1": "conductivity", "arr2": "pressure"} output_param = "salinity" # set up DPD and DP #2 - array add function tf_obj = IonObject(RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM, uri='http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject(RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, version_label='1.0a' ) add_array_dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, add_array_dpd_id, binding='add_array_func' ) # create the data process dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) self.damsclient.register_process(dp1_data_process_id) #self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id) # add an attachment object to this DPD to test new SA-21 import msgpack attachment_content = 'foo bar' attachment_obj = IonObject( RT.Attachment, name='test_attachment', attachment_type=AttachmentType.ASCII, content_type='text/plain', content=msgpack.packb(attachment_content)) att_id = self.rrclient.create_attachment(add_array_dpd_id, attachment_obj) self.addCleanup(self.rrclient.delete_attachment, att_id) return add_array_dpd_id, dp1_data_process_id, dp1_func_output_dp_id def create_output_data_product(self): dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition(name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id) dp1_output_dp_obj = IonObject( RT.DataProduct, name='data_process1_data_product', description='output of add array func', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) dp1_func_output_dp_id = self.dataproductclient.create_data_product(dp1_output_dp_obj, dp1_outgoing_stream_id) self.addCleanup(self.dataproductclient.delete_data_product, dp1_func_output_dp_id) # retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id, PRED.hasStream, None, True) self._output_stream_ids.append(stream_ids[0]) subscription_id = self.pubsub_client.create_subscription('validator', data_product_ids=[dp1_func_output_dp_id]) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) def on_granule(msg, route, stream_id): log.debug('recv_packet stream_id: %s route: %s msg: %s', stream_id, route, msg) self.validate_output_granule(msg, route, stream_id) self.granule_verified.set() validator = StandaloneStreamSubscriber('validator', callback=on_granule) validator.start() self.addCleanup(validator.stop) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) return dp1_func_output_dp_id def validate_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ data_process_event = args[0] log.debug("DataProcessStatusEvent: %s" , str(data_process_event.__dict__)) # if data process already created, check origin if self.dp_list: self.assertIn( data_process_event.origin, self.dp_list) # if this is a heartbeat event then 100 granules have been processed if 'data process status update.' in data_process_event.description: self.heartbeat_event_verified.set() else: # else check that this is the assign event if 'Data process assigned to transform worker' in data_process_event.description: self.worker_assigned_event_verified.set() elif 'Data process created for data product' in data_process_event.description: self.dp_created_event_verified.set() def validate_output_granule(self, msg, route, stream_id): self.assertIn( stream_id, self._output_stream_ids) rdt = RecordDictionaryTool.load_from_granule(msg) log.debug('validate_output_granule rdt: %s', rdt) sal_val = rdt['salinity'] np.testing.assert_array_equal(sal_val, np.array([3])) def start_event_listener(self): es = EventSubscriber(event_type=OT.DataProcessStatusEvent, callback=self.validate_event) es.start() self.addCleanup(es.stop) def validate_transform_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ status_alert_event = args[0] np.testing.assert_array_equal(status_alert_event.origin, self.stream_id ) np.testing.assert_array_equal(status_alert_event.values, np.array([self.event_data_process_id])) log.debug("DeviceStatusAlertEvent: %s" , str(status_alert_event.__dict__)) self.event_verified.set() def start_event_transform_listener(self): es = EventSubscriber(event_type=OT.DeviceStatusAlertEvent, callback=self.validate_transform_event) es.start() self.addCleanup(es.stop) def test_download(self): egg_url = 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' egg_path = TransformWorker.download_egg(egg_url) import pkg_resources pkg_resources.working_set.add_entry(egg_path) from ion_example.add_arrays import add_arrays a = add_arrays(1,2) self.assertEquals(a,3)
class TestDataProcessManagementPrime(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dataset_management = DatasetManagementServiceClient() self.resource_registry = self.container.resource_registry self.pubsub_management = PubsubManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() self.validators = 0 def lc_preload(self): config = DotDict() config.op = 'load' config.scenario = 'BASE,LC_TEST' config.categories = 'ParameterFunctions,ParameterDefs,ParameterDictionary' config.path = 'res/preload/r2_ioc' self.container.spawn_process('preload','ion.processes.bootstrap.ion_loader','IONLoader', config) def ctd_plain_input_data_product(self): available_fields = [ 'internal_timestamp', 'temp', 'preferred_timestamp', 'time', 'port_timestamp', 'quality_flag', 'lat', 'conductivity', 'driver_timestamp', 'lon', 'pressure'] return self.make_data_product('ctd_parsed_param_dict', 'ctd plain test', available_fields) def ctd_plain_salinity(self): available_fields = [ 'internal_timestamp', 'preferred_timestamp', 'time', 'port_timestamp', 'quality_flag', 'lat', 'driver_timestamp', 'lon', 'salinity'] return self.make_data_product('ctd_parsed_param_dict', 'salinity', available_fields) def ctd_plain_density(self): available_fields = [ 'internal_timestamp', 'preferred_timestamp', 'time', 'port_timestamp', 'quality_flag', 'lat', 'driver_timestamp', 'lon', 'density'] return self.make_data_product('ctd_parsed_param_dict', 'density', available_fields) def ctd_instrument_data_product(self): available_fields = [ 'internal_timestamp', 'temp', 'preferred_timestamp', 'time', 'port_timestamp', 'quality_flag', 'lat', 'conductivity', 'driver_timestamp', 'lon', 'pressure'] return self.make_data_product('ctd_LC_TEST', 'ctd instrument', available_fields) def make_data_product(self, pdict_name, dp_name, available_fields=[]): pdict_id = self.dataset_management.read_parameter_dictionary_by_name(pdict_name, id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('%s stream_def' % dp_name, parameter_dictionary_id=pdict_id, available_fields=available_fields or None) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) tdom, sdom = time_series_domain() tdom = tdom.dump() sdom = sdom.dump() dp_obj = DataProduct(name=dp_name) dp_obj.temporal_domain = tdom dp_obj.spatial_domain = sdom data_product_id = self.data_product_management.create_data_product(dp_obj, stream_definition_id=stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) return data_product_id def google_dt_data_product(self): return self.make_data_product('google_dt', 'visual') def ctd_derived_data_product(self): return self.make_data_product('ctd_LC_TEST', 'ctd derived products') def publish_to_plain_data_product(self, data_product_id): stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) self.assertTrue(len(stream_ids)) stream_id = stream_ids.pop() route = self.pubsub_management.read_stream_route(stream_id) stream_definition = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_definition._id publisher = StandaloneStreamPublisher(stream_id, route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) now = time.time() ntp_now = now + 2208988800 # Do not use in production, this is a loose translation rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [20.0] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['time'] = [ntp_now] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [None] rdt['lat'] = [45] rdt['conductivity'] = [4.2914] rdt['driver_timestamp'] = [ntp_now] rdt['lon'] = [-71] rdt['pressure'] = [3.068] granule = rdt.to_granule() publisher.publish(granule) def publish_to_data_product(self, data_product_id): stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) self.assertTrue(len(stream_ids)) stream_id = stream_ids.pop() route = self.pubsub_management.read_stream_route(stream_id) stream_definition = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_definition._id publisher = StandaloneStreamPublisher(stream_id, route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) now = time.time() ntp_now = now + 2208988800 # Do not use in production, this is a loose translation rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [300000] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['time'] = [ntp_now] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [None] rdt['lat'] = [45] rdt['conductivity'] = [4341400] rdt['driver_timestamp'] = [ntp_now] rdt['lon'] = [-71] rdt['pressure'] = [256.8] granule = rdt.to_granule() publisher.publish(granule) def setup_subscriber(self, data_product_id, callback): stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) self.assertTrue(len(stream_ids)) stream_id = stream_ids.pop() sub_id = self.pubsub_management.create_subscription('validator_%s'%self.validators, stream_ids=[stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) subscriber = StandaloneStreamSubscriber('validator_%s' % self.validators, callback=callback) subscriber.start() self.addCleanup(subscriber.stop) self.validators+=1 return subscriber def create_density_transform_function(self): tf = TransformFunction(name='ctdbp_l2_density', module='ion.processes.data.transforms.ctdbp.ctdbp_L2_density', cls='CTDBP_DensityTransformAlgorithm') tf_id = self.data_process_management.create_transform_function(tf) self.addCleanup(self.data_process_management.delete_transform_function, tf_id) return tf_id def create_salinity_transform_function(self): tf = TransformFunction(name='ctdbp_l2_salinity', module='ion.processes.data.transforms.ctdbp.ctdbp_L2_salinity', cls='CTDBP_SalinityTransformAlgorithm') tf_id = self.data_process_management.create_transform_function(tf) self.addCleanup(self.data_process_management.delete_transform_function, tf_id) return tf_id @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_data_process_prime(self): self.lc_preload() instrument_data_product_id = self.ctd_instrument_data_product() derived_data_product_id = self.ctd_derived_data_product() data_process_id = self.data_process_management.create_data_process2(in_data_product_ids=[instrument_data_product_id], out_data_product_ids=[derived_data_product_id]) self.addCleanup(self.data_process_management.delete_data_process2, data_process_id) self.data_process_management.activate_data_process2(data_process_id) self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id) validated = Event() def validation(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) np.testing.assert_array_almost_equal(rdt['conductivity_L1'], np.array([42.914])) np.testing.assert_array_almost_equal(rdt['temp_L1'], np.array([20.])) np.testing.assert_array_almost_equal(rdt['pressure_L1'], np.array([3.068])) np.testing.assert_array_almost_equal(rdt['density'], np.array([1021.7144739593881])) np.testing.assert_array_almost_equal(rdt['salinity'], np.array([30.935132729668283])) validated.set() self.setup_subscriber(derived_data_product_id, callback=validation) self.publish_to_data_product(instrument_data_product_id) self.assertTrue(validated.wait(10)) def test_older_transform(self): input_data_product_id = self.ctd_plain_input_data_product() conductivity_data_product_id = self.make_data_product('ctd_parsed_param_dict', 'conductivity_product', ['time', 'conductivity']) conductivity_stream_def_id = self.get_named_stream_def('conductivity_product stream_def') temperature_data_product_id = self.make_data_product('ctd_parsed_param_dict', 'temperature_product', ['time', 'temp']) temperature_stream_def_id = self.get_named_stream_def('temperature_product stream_def') pressure_data_product_id = self.make_data_product('ctd_parsed_param_dict', 'pressure_product', ['time', 'pressure']) pressure_stream_def_id = self.get_named_stream_def('pressure_product stream_def') dpd = DataProcessDefinition(name='ctdL0') dpd.data_process_type = DataProcessTypeEnum.TRANSFORM dpd.module = 'ion.processes.data.transforms.ctd.ctd_L0_all' dpd.class_name = 'ctd_L0_all' data_process_definition_id = self.data_process_management.create_data_process_definition(dpd) self.addCleanup(self.data_process_management.delete_data_process_definition, data_process_definition_id) self.data_process_management.assign_stream_definition_to_data_process_definition(conductivity_stream_def_id, data_process_definition_id, binding='conductivity') self.data_process_management.assign_stream_definition_to_data_process_definition(temperature_stream_def_id, data_process_definition_id, binding='temperature') self.data_process_management.assign_stream_definition_to_data_process_definition(pressure_stream_def_id, data_process_definition_id, binding='pressure') data_process_id = self.data_process_management.create_data_process2(data_process_definition_id=data_process_definition_id, in_data_product_ids=[input_data_product_id], out_data_product_ids=[conductivity_data_product_id, temperature_data_product_id, pressure_data_product_id]) self.addCleanup(self.data_process_management.delete_data_process2, data_process_id) self.data_process_management.activate_data_process2(data_process_id) self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id) conductivity_validated = Event() def validate_conductivity(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) np.testing.assert_array_almost_equal(rdt['conductivity'], np.array([4.2914])) conductivity_validated.set() self.setup_subscriber(conductivity_data_product_id, callback=validate_conductivity) temperature_validated = Event() def validate_temperature(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) np.testing.assert_array_almost_equal(rdt['temp'], np.array([20.0])) temperature_validated.set() self.setup_subscriber(temperature_data_product_id, callback=validate_temperature) pressure_validated = Event() def validate_pressure(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) np.testing.assert_array_almost_equal(rdt['pressure'], np.array([3.068])) pressure_validated.set() self.setup_subscriber(pressure_data_product_id, callback=validate_pressure) self.publish_to_plain_data_product(input_data_product_id) self.assertTrue(conductivity_validated.wait(10)) self.assertTrue(temperature_validated.wait(10)) self.assertTrue(pressure_validated.wait(10)) def get_named_stream_def(self, name): stream_def_ids, _ = self.resource_registry.find_resources(name=name, restype=RT.StreamDefinition, id_only=True) return stream_def_ids[0] def test_actors(self): input_data_product_id = self.ctd_plain_input_data_product() output_data_product_id = self.ctd_plain_density() actor = self.create_density_transform_function() route = {input_data_product_id: {output_data_product_id: actor}} config = DotDict() config.process.routes = route config.process.params.lat = 45. config.process.params.lon = -71. data_process_id = self.data_process_management.create_data_process2(in_data_product_ids=[input_data_product_id], out_data_product_ids=[output_data_product_id], configuration=config) self.addCleanup(self.data_process_management.delete_data_process2, data_process_id) self.data_process_management.activate_data_process2(data_process_id) self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id) validated = Event() def validation(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) # The value I use is a double, the value coming back is only a float32 so there's some data loss but it should be precise to the 4th digit np.testing.assert_array_almost_equal(rdt['density'], np.array([1021.6839775385847]), decimal=4) validated.set() self.setup_subscriber(output_data_product_id, callback=validation) self.publish_to_plain_data_product(input_data_product_id) self.assertTrue(validated.wait(10)) def test_multi_in_out(self): input1 = self.ctd_plain_input_data_product() input2 = self.make_data_product('ctd_parsed_param_dict', 'input2') density_dp_id = self.ctd_plain_density() salinity_dp_id = self.ctd_plain_salinity() density_actor = self.create_density_transform_function() salinity_actor = self.create_salinity_transform_function() routes = { input1 : { density_dp_id : density_actor, salinity_dp_id : salinity_actor }, input2 : { density_dp_id : density_actor } } config = DotDict() config.process.routes = routes config.process.params.lat = 45. config.process.params.lon = -71. data_process_id = self.data_process_management.create_data_process2(in_data_product_ids=[input1, input2], out_data_product_ids=[density_dp_id, salinity_dp_id], configuration=config) self.addCleanup(self.data_process_management.delete_data_process2, data_process_id) self.data_process_management.activate_data_process2(data_process_id) self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id) density_validated = Event() salinity_validated = Event() def density_validation(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) np.testing.assert_array_almost_equal(rdt['density'], np.array([1021.6839775385847]), decimal=4) density_validated.set() def salinity_validation(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) np.testing.assert_array_almost_equal(rdt['salinity'], np.array([30.93513240786831]), decimal=4) salinity_validated.set() self.setup_subscriber(density_dp_id, callback=density_validation) self.setup_subscriber(salinity_dp_id, callback=salinity_validation) self.publish_to_plain_data_product(input1) self.assertTrue(density_validated.wait(10)) self.assertTrue(salinity_validated.wait(10)) density_validated.clear() salinity_validated.clear() self.publish_to_plain_data_product(input2) self.assertTrue(density_validated.wait(10)) self.assertFalse(salinity_validated.wait(0.75)) density_validated.clear() salinity_validated.clear() def test_visual_transform(self): input_data_product_id = self.ctd_plain_input_data_product() output_data_product_id = self.google_dt_data_product() dpd = DataProcessDefinition(name='visual transform') dpd.data_process_type = DataProcessTypeEnum.TRANSFORM dpd.module = 'ion.processes.data.transforms.viz.google_dt' dpd.class_name = 'VizTransformGoogleDT' #-------------------------------------------------------------------------------- # Walk before we base jump #-------------------------------------------------------------------------------- data_process_definition_id = self.data_process_management.create_data_process_definition(dpd) self.addCleanup(self.data_process_management.delete_data_process_definition, data_process_definition_id) data_process_id = self.data_process_management.create_data_process2(data_process_definition_id=data_process_definition_id, in_data_product_ids=[input_data_product_id], out_data_product_ids=[output_data_product_id]) self.addCleanup(self.data_process_management.delete_data_process2,data_process_id) self.data_process_management.activate_data_process2(data_process_id) self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id) validated = Event() def validation(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) self.assertTrue(rdt['google_dt_components'] is not None) validated.set() self.setup_subscriber(output_data_product_id, callback=validation) self.publish_to_plain_data_product(input_data_product_id) self.assertTrue(validated.wait(10))
class TestDMEnd2End(IonIntegrationTestCase): def setUp(self): # Love the non pep-8 convention self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.process_dispatcher = ProcessDispatcherServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() self.ingestion_management = IngestionManagementServiceClient() self.data_retriever = DataRetrieverServiceClient() self.pids = [] self.event = Event() self.exchange_space_name = 'test_granules' self.exchange_point_name = 'science_data' self.i = 0 self.purge_queues() self.queue_buffer = [] self.streams = [] self.addCleanup(self.stop_all_ingestion) def purge_queues(self): xn = self.container.ex_manager.create_xn_queue('science_granule_ingestion') xn.purge() def tearDown(self): self.purge_queues() for pid in self.pids: self.container.proc_manager.terminate_process(pid) IngestionManagementIntTest.clean_subscriptions() for queue in self.queue_buffer: if isinstance(queue, ExchangeNameQueue): queue.delete() elif isinstance(queue, str): xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() #-------------------------------------------------------------------------------- # Helper/Utility methods #-------------------------------------------------------------------------------- def create_dataset(self, parameter_dict_id=''): ''' Creates a time-series dataset ''' tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() if not parameter_dict_id: parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) dataset_id = self.dataset_management.create_dataset('test_dataset_%i'%self.i, parameter_dictionary_id=parameter_dict_id, spatial_domain=sdom, temporal_domain=tdom) return dataset_id def get_datastore(self, dataset_id): ''' Gets an instance of the datastore This method is primarily used to defeat a bug where integration tests in multiple containers may sometimes delete a CouchDB datastore and the other containers are unaware of the new state of the datastore. ''' dataset = self.dataset_management.read_dataset(dataset_id) datastore_name = dataset.datastore_name datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA) return datastore def get_ingestion_config(self): ''' Grab the ingestion configuration from the resource registry ''' # The ingestion configuration should have been created by the bootstrap service # which is configured through r2deploy.yml ingest_configs, _ = self.resource_registry.find_resources(restype=RT.IngestionConfiguration,id_only=True) return ingest_configs[0] def launch_producer(self, stream_id=''): ''' Launch the producer ''' pid = self.container.spawn_process('better_data_producer', 'ion.processes.data.example_data_producer', 'BetterDataProducer', {'process':{'stream_id':stream_id}}) self.pids.append(pid) def make_simple_dataset(self): ''' Makes a stream, a stream definition and a dataset, the essentials for most of these tests ''' pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('ctd stream %i' % self.i, 'xp1', stream_definition_id=stream_def_id) dataset_id = self.create_dataset(pdict_id) self.get_datastore(dataset_id) self.i += 1 return stream_id, route, stream_def_id, dataset_id def publish_hifi(self,stream_id,stream_route,offset=0): ''' Publish deterministic data ''' pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) + (offset * 10) rdt['temp'] = np.arange(10) + (offset * 10) pub.publish(rdt.to_granule()) def publish_fake_data(self,stream_id, route): ''' Make four granules ''' for i in xrange(4): self.publish_hifi(stream_id,route,i) def start_ingestion(self, stream_id, dataset_id): ''' Starts ingestion/persistence for a given dataset ''' ingest_config_id = self.get_ingestion_config() self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) def stop_ingestion(self, stream_id): ingest_config_id = self.get_ingestion_config() self.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id) def stop_all_ingestion(self): try: [self.stop_ingestion(sid) for sid in self.streams] except: pass def validate_granule_subscription(self, msg, route, stream_id): ''' Validation for granule format ''' if msg == {}: return rdt = RecordDictionaryTool.load_from_granule(msg) log.info('%s', rdt.pretty_print()) self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg)) self.event.set() def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40): ''' Loops until there is a sufficient amount of data in the dataset ''' done = False with gevent.Timeout(40): while not done: extents = self.dataset_management.dataset_extents(dataset_id, 'time')[0] granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1) rdt = RecordDictionaryTool.load_from_granule(granule) if rdt['time'] and rdt['time'][0] != rdt._pdict.get_context('time').fill_value and extents >= data_size: done = True else: gevent.sleep(0.2) #-------------------------------------------------------------------------------- # Test Methods #-------------------------------------------------------------------------------- @attr('SMOKE') def test_dm_end_2_end(self): #-------------------------------------------------------------------------------- # Set up a stream and have a mock instrument (producer) send data #-------------------------------------------------------------------------------- self.event.clear() # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) #-------------------------------------------------------------------------------- # Start persisting the data on the stream # - Get the ingestion configuration from the resource registry # - Create the dataset # - call persist_data_stream to setup the subscription for the ingestion workers # on the stream that you specify which causes the data to be persisted #-------------------------------------------------------------------------------- ingest_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) #-------------------------------------------------------------------------------- # Now the granules are ingesting and persisted #-------------------------------------------------------------------------------- self.launch_producer(stream_id) self.wait_until_we_have_enough_granules(dataset_id,40) #-------------------------------------------------------------------------------- # Now get the data in one chunk using an RPC Call to start_retreive #-------------------------------------------------------------------------------- replay_data = self.data_retriever.retrieve(dataset_id) self.assertIsInstance(replay_data, Granule) rdt = RecordDictionaryTool.load_from_granule(replay_data) self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:]) self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all()) #-------------------------------------------------------------------------------- # Now to try the streamed approach #-------------------------------------------------------------------------------- replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id) log.info('Process ID: %s', process_id) replay_client = ReplayClient(process_id) #-------------------------------------------------------------------------------- # Create the listening endpoint for the the retriever to talk to #-------------------------------------------------------------------------------- xp = self.container.ex_manager.create_xp(self.exchange_point_name) subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription) self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) self.data_retriever.start_replay_agent(self.replay_id) self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched') replay_client.start_replay() self.assertTrue(self.event.wait(10)) subscriber.stop() self.data_retriever.cancel_replay_agent(self.replay_id) #-------------------------------------------------------------------------------- # Test the slicing capabilities #-------------------------------------------------------------------------------- granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)}) rdt = RecordDictionaryTool.load_from_granule(granule) b = rdt['time'] == np.arange(5) self.assertTrue(b.all() if not isinstance(b,bool) else b) self.streams.append(stream_id) self.stop_ingestion(stream_id) def test_coverage_transform(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_parsed() stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) ingestion_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id) self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id) publisher = StandaloneStreamPublisher(stream_id, route) rdt = ph.get_rdt(stream_def_id) ph.fill_parsed_rdt(rdt) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.event.wait(30)) replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time']) np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp']) np.testing.assert_array_almost_equal(rdt_out['conductivity_L1'], np.array([42.914])) np.testing.assert_array_almost_equal(rdt_out['temp_L1'], np.array([20.])) np.testing.assert_array_almost_equal(rdt_out['pressure_L1'], np.array([3.068])) np.testing.assert_array_almost_equal(rdt_out['density'], np.array([1021.7144739593881])) np.testing.assert_array_almost_equal(rdt_out['salinity'], np.array([30.935132729668283])) def test_qc_events(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_qc_pdict() stream_def_id = self.pubsub_management.create_stream_definition('qc stream def', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('qc stream', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) ingestion_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) config = DotDict() self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id, config=config) self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id) publisher = StandaloneStreamPublisher(stream_id, route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.arange(10) * 3 verified = Event() def verification(event, *args, **kwargs): self.assertEquals(event.qc_parameter, 'temp_qc') self.assertEquals(event.temporal_value, 7) verified.set() es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=dataset_id, callback=verification, auto_delete=True) es.start() self.addCleanup(es.stop) publisher.publish(rdt.to_granule()) self.assertTrue(verified.wait(10)) def test_lookup_values_ingest_replay(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_lookups() stream_def_id = self.pubsub_management.create_stream_definition('lookups', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) ingestion_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) config = DotDict() config.process.lookup_docs = ['test1', 'test2'] self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id, config=config) self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id) stored_value_manager = StoredValueManager(self.container) stored_value_manager.stored_value_cas('test1',{'offset_a':10.0, 'offset_b':13.1}) publisher = StandaloneStreamPublisher(stream_id, route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(20) rdt['temp'] = [20.0] * 20 granule = rdt.to_granule() dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(30)) replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['time'], np.arange(20)) np.testing.assert_array_almost_equal(rdt_out['temp'], np.array([20.] * 20)) np.testing.assert_array_almost_equal(rdt_out['calibrated'], np.array([30.]*20)) np.testing.assert_array_equal(rdt_out['offset_b'], np.array([rdt_out.fill_value('offset_b')] * 20)) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(20,40) rdt['temp'] = [20.0] * 20 granule = rdt.to_granule() dataset_monitor.event.clear() stored_value_manager.stored_value_cas('test1',{'offset_a':20.0}) stored_value_manager.stored_value_cas('coefficient_document',{'offset_b':10.0}) gevent.sleep(2) publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(30)) replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['time'], np.arange(40)) np.testing.assert_array_almost_equal(rdt_out['temp'], np.array([20.] * 20 + [20.] * 20)) np.testing.assert_array_equal(rdt_out['offset_b'], np.array([10.] * 40)) np.testing.assert_array_almost_equal(rdt_out['calibrated'], np.array([30.]*20 + [40.]*20)) np.testing.assert_array_almost_equal(rdt_out['calibrated_b'], np.array([40.] * 20 + [50.] * 20)) @unittest.skip('Doesnt work') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_replay_pause(self): # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id) replay_stream, replay_route = self.pubsub_management.create_stream('replay', 'xp1', stream_definition_id=stream_def_id) dataset_id = self.create_dataset(pdict_id) scov = DatasetManagementService._get_simplex_coverage(dataset_id) bb = CoverageCraft(scov) bb.rdt['time'] = np.arange(100) bb.rdt['temp'] = np.random.random(100) + 30 bb.sync_with_granule() DatasetManagementService._persist_coverage(dataset_id, bb.coverage) # This invalidates it for multi-host configurations # Set up the subscriber to verify the data subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription) xp = self.container.ex_manager.create_xp('xp1') self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) # Set up the replay agent and the client wrapper # 1) Define the Replay (dataset and stream to publish on) self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream) # 2) Make a client to the interact with the process (optionall provide it a process to bind with) replay_client = ReplayClient(process_id) # 3) Start the agent (launch the process) self.data_retriever.start_replay_agent(self.replay_id) # 4) Start replaying... replay_client.start_replay() # Wait till we get some granules self.assertTrue(self.event.wait(5)) # We got granules, pause the replay, clear the queue and allow the process to finish consuming replay_client.pause_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure there's no remaining messages being consumed self.assertFalse(self.event.wait(1)) # Resume the replay and wait until we start getting granules again replay_client.resume_replay() self.assertTrue(self.event.wait(5)) # Stop the replay, clear the queues replay_client.stop_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure that it did indeed stop self.assertFalse(self.event.wait(1)) subscriber.stop() def test_retrieve_and_transform(self): # Make a simple dataset and start ingestion, pretty standard stuff. ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(ctd_stream_id, dataset_id) # Stream definition for the salinity data salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) sal_stream_def_id = self.pubsub_management.create_stream_definition('sal data', parameter_dictionary_id=salinity_pdict_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['conductivity'] = np.random.randn(10) * 2 + 10 rdt['pressure'] = np.random.randn(10) * 1 + 12 publisher = StandaloneStreamPublisher(ctd_stream_id, route) publisher.publish(rdt.to_granule()) rdt['time'] = np.arange(10,20) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id, 20) granule = self.data_retriever.retrieve(dataset_id, None, None, 'ion.processes.data.transforms.ctd.ctd_L2_salinity', 'CTDL2SalinityTransformAlgorithm', kwargs=dict(params=sal_stream_def_id)) rdt = RecordDictionaryTool.load_from_granule(granule) for i in rdt['salinity']: self.assertNotEquals(i,0) self.streams.append(ctd_stream_id) self.stop_ingestion(ctd_stream_id) def test_last_granule(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) self.publish_hifi(stream_id,route, 0) self.publish_hifi(stream_id,route, 1) self.wait_until_we_have_enough_granules(dataset_id,20) # I just need two success = False def verifier(): replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id, 10) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(10) + 10 if not isinstance(comp,bool): return comp.all() return False success = poll(verifier) self.assertTrue(success) success = False def verify_points(): replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id,5) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(15,20) if not isinstance(comp,bool): return comp.all() return False success = poll(verify_points) self.assertTrue(success) self.streams.append(stream_id) self.stop_ingestion(stream_id) def test_replay_with_parameters(self): #-------------------------------------------------------------------------------- # Create the configurations and the dataset #-------------------------------------------------------------------------------- # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) self.publish_fake_data(stream_id, route) self.assertTrue(dataset_monitor.event.wait(30)) query = { 'start_time': 0 - 2208988800, 'end_time': 20 - 2208988800, 'stride_time' : 2, 'parameters': ['time','temp'] } retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query) rdt = RecordDictionaryTool.load_from_granule(retrieved_data) comp = np.arange(0,20,2) == rdt['time'] self.assertTrue(comp.all(),'%s' % rdt.pretty_print()) self.assertEquals(set(rdt.iterkeys()), set(['time','temp'])) extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp']) self.assertTrue(extents['time']>=20) self.assertTrue(extents['temp']>=20) self.streams.append(stream_id) self.stop_ingestion(stream_id) def test_repersist_data(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) self.publish_hifi(stream_id,route,0) self.publish_hifi(stream_id,route,1) self.wait_until_we_have_enough_granules(dataset_id,20) config_id = self.get_ingestion_config() self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id) self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id) self.publish_hifi(stream_id,route,2) self.publish_hifi(stream_id,route,3) self.wait_until_we_have_enough_granules(dataset_id,40) success = False with gevent.timeout.Timeout(5): while not success: replay_granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(0,40) if not isinstance(comp,bool): success = comp.all() gevent.sleep(1) self.assertTrue(success) self.streams.append(stream_id) self.stop_ingestion(stream_id) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.') def test_correct_time(self): # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e. # the conversion factor between unix and NTP time unix_now = np.floor(time.time()) ntp_now = unix_now + 2208988800 unix_ago = unix_now - 20 ntp_ago = unix_ago + 2208988800 stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() coverage = DatasetManagementService._get_simplex_coverage(dataset_id) coverage.insert_timesteps(20) coverage.set_parameter_values('time', np.arange(ntp_ago,ntp_now)) temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id) self.assertTrue( np.abs(temporal_bounds[0] - unix_ago) < 2) self.assertTrue( np.abs(temporal_bounds[1] - unix_now) < 2) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.') def test_empty_coverage_time(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() coverage = DatasetManagementService._get_coverage(dataset_id) temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id) self.assertEquals([coverage.get_parameter_context('time').fill_value] *2, temporal_bounds) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.') def test_out_of_band_retrieve(self): # Setup the environemnt stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) # Fill the dataset self.publish_fake_data(stream_id, route) self.wait_until_we_have_enough_granules(dataset_id,40) # Retrieve the data granule = DataRetrieverService.retrieve_oob(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) self.assertTrue((rdt['time'] == np.arange(40)).all()) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.') def test_retrieve_cache(self): DataRetrieverService._refresh_interval = 1 datasets = [self.make_simple_dataset() for i in xrange(10)] for stream_id, route, stream_def_id, dataset_id in datasets: coverage = DatasetManagementService._get_simplex_coverage(dataset_id) coverage.insert_timesteps(10) coverage.set_parameter_values('time', np.arange(10)) coverage.set_parameter_values('temp', np.arange(10)) # Verify cache hit and refresh dataset_ids = [i[3] for i in datasets] self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache) DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]] # Verify that it was hit and it's now in there self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache) gevent.sleep(DataRetrieverService._refresh_interval + 0.2) DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]] self.assertTrue(age2 != age) for dataset_id in dataset_ids: DataRetrieverService._get_coverage(dataset_id) self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache) stream_id, route, stream_def, dataset_id = datasets[0] self.start_ingestion(stream_id, dataset_id) DataRetrieverService._get_coverage(dataset_id) self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache) DataRetrieverService._refresh_interval = 100 self.publish_hifi(stream_id,route,1) self.wait_until_we_have_enough_granules(dataset_id, data_size=20) event = gevent.event.Event() with gevent.Timeout(20): while not event.wait(0.1): if dataset_id not in DataRetrieverService._retrieve_cache: event.set() self.assertTrue(event.is_set()) def publish_and_wait(self, dataset_id, granule): stream_ids, _ = self.resource_registry.find_objects(dataset_id, PRED.hasStream,id_only=True) stream_id=stream_ids[0] route = self.pubsub_management.read_stream_route(stream_id) publisher = StandaloneStreamPublisher(stream_id,route) dataset_monitor = DatasetMonitor(dataset_id) publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(10)) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.') def test_thorough_gap_analysis(self): dataset_id = self.test_ingestion_gap_analysis() vcov = DatasetManagementService._get_coverage(dataset_id) self.assertIsInstance(vcov,ViewCoverage) ccov = vcov.reference_coverage self.assertIsInstance(ccov, ComplexCoverage) self.assertEquals(len(ccov._reference_covs), 3) def test_ingestion_gap_analysis(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) self.addCleanup(self.stop_ingestion, stream_id) connection1 = uuid4().hex connection2 = uuid4().hex rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [0] rdt['temp'] = [0] self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index='0')) rdt['time'] = [1] rdt['temp'] = [1] self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index=1)) rdt['time'] = [2] rdt['temp'] = [2] self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index='3')) # Gap, missed message rdt['time'] = [3] rdt['temp'] = [3] self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index='3')) # Gap, new connection rdt['time'] = [4] rdt['temp'] = [4] self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index='4')) rdt['time'] = [5] rdt['temp'] = [5] self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index=5)) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(6)) np.testing.assert_array_equal(rdt['temp'], np.arange(6)) return dataset_id @unittest.skip('Outdated due to ingestion retry') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.') def test_ingestion_failover(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) event = Event() def cb(*args, **kwargs): event.set() sub = EventSubscriber(event_type="ExceptionEvent", callback=cb, origin="stream_exception") sub.start() self.publish_fake_data(stream_id, route) self.wait_until_we_have_enough_granules(dataset_id, 40) file_path = DatasetManagementService._get_coverage_path(dataset_id) master_file = os.path.join(file_path, '%s_master.hdf5' % dataset_id) with open(master_file, 'w') as f: f.write('this will crash HDF') self.publish_hifi(stream_id, route, 5) self.assertTrue(event.wait(10)) sub.stop() @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.') def test_coverage_types(self): # Make a simple dataset and start ingestion, pretty standard stuff. ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() cov = DatasetManagementService._get_coverage(dataset_id=dataset_id) self.assertIsInstance(cov, ViewCoverage) cov = DatasetManagementService._get_simplex_coverage(dataset_id=dataset_id) self.assertIsInstance(cov, SimplexCoverage)
class TestTransformPrime(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Because hey why not?! self.dataset_management = DatasetManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() def setup_streams(self): in_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('sbe37_L0_test', id_only=True) out_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('sbe37_L1_test', id_only=True) in_stream_def_id = self.pubsub_management.create_stream_definition('L0 SBE37', parameter_dictionary_id=in_pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_def_id) out_stream_def_id = self.pubsub_management.create_stream_definition('L1 SBE37', parameter_dictionary_id=out_pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_def_id) in_stream_id, in_route = self.pubsub_management.create_stream('L0 input', stream_definition_id=in_stream_def_id, exchange_point='test') self.addCleanup(self.pubsub_management.delete_stream, in_stream_id) out_stream_id, out_route = self.pubsub_management.create_stream('L0 output', stream_definition_id=out_stream_def_id, exchange_point='test') self.addCleanup(self.pubsub_management.delete_stream, out_stream_id) return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)] def setup_advanced_streams(self): in_pdict_id = out_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('sbe37_LC_TEST', id_only=True) in_stream_def_id = self.pubsub_management.create_stream_definition('sbe37_instrument', parameter_dictionary_id=in_pdict_id, available_fields=['time', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'lat', 'lon']) self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_def_id) out_stream_def_id = self.pubsub_management.create_stream_definition('sbe37_l2', parameter_dictionary_id=out_pdict_id, available_fields=['time', 'rho','PRACSAL_L2']) self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_def_id) in_stream_id, in_route = self.pubsub_management.create_stream('instrument stream', stream_definition_id=in_stream_def_id, exchange_point='test') self.addCleanup(self.pubsub_management.delete_stream, in_stream_id) out_stream_id, out_route = self.pubsub_management.create_stream('data product stream', stream_definition_id=out_stream_def_id, exchange_point='test') self.addCleanup(self.pubsub_management.delete_stream, out_stream_id) return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)] def preload(self): config = DotDict() config.op = 'load' config.scenario = 'BASE,LC_TEST' config.categories = 'ParameterFunctions,ParameterDefs,ParameterDictionary' config.path = 'res/preload/r2_ioc' self.container.spawn_process('preload','ion.processes.bootstrap.ion_loader','IONLoader', config) def setup_advanced_transform(self): self.preload() queue_name = 'transform_prime' stream_info = self.setup_advanced_streams() in_stream_id, in_stream_def_id = stream_info[0] out_stream_id, out_stream_def_id = stream_info[1] routes = {} routes[(in_stream_id, out_stream_id)]= None config = DotDict() config.process.queue_name = queue_name config.process.routes = routes config.process.publish_streams = {out_stream_id:out_stream_id} sub_id = self.pubsub_management.create_subscription(queue_name, stream_ids=[in_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) self.container.spawn_process('transform_prime', 'ion.processes.data.transforms.transform_prime','TransformPrime', config) listen_sub_id = self.pubsub_management.create_subscription('listener', stream_ids=[out_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, listen_sub_id) self.pubsub_management.activate_subscription(listen_sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, listen_sub_id) return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)] def setup_transform(self): self.preload() queue_name = 'transform_prime' stream_info = self.setup_streams() in_stream_id, in_stream_def_id = stream_info[0] out_stream_id, out_stream_def_id = stream_info[1] routes = {} routes[(in_stream_id, out_stream_id)]= None config = DotDict() config.process.queue_name = queue_name config.process.routes = routes config.process.publish_streams = {out_stream_id:out_stream_id} sub_id = self.pubsub_management.create_subscription(queue_name, stream_ids=[in_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) self.container.spawn_process('transform_prime', 'ion.processes.data.transforms.transform_prime','TransformPrime', config) listen_sub_id = self.pubsub_management.create_subscription('listener', stream_ids=[out_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, listen_sub_id) self.pubsub_management.activate_subscription(listen_sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, listen_sub_id) return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)] def setup_validator(self, validator): listener = StandaloneStreamSubscriber('listener', validator) listener.start() self.addCleanup(listener.stop) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_execute_advanced_transform(self): # Runs a transform across L0-L2 with stream definitions including available fields streams = self.setup_advanced_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_defs_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['rho'], np.array([1001.0055034])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool(stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2)) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_execute_transform(self): streams = self.setup_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_def_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])): return if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])): return if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool(stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2))
class PubsubManagementIntTest(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub_management = PubsubManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() self.pdicts = {} self.queue_cleanup = list() self.exchange_cleanup = list() self.context_ids = set() def tearDown(self): for queue in self.queue_cleanup: xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() for exchange in self.exchange_cleanup: xp = self.container.ex_manager.create_xp(exchange) xp.delete() self.cleanup_contexts() def test_stream_def_crud(self): # Test Creation pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict') stream_definition_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict.identifier) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_definition_id) # Make sure there is an assoc self.assertTrue(self.resource_registry.find_associations(subject=stream_definition_id, predicate=PRED.hasParameterDictionary, object=pdict.identifier, id_only=True)) # Test Reading stream_definition = self.pubsub_management.read_stream_definition(stream_definition_id) self.assertTrue(PubsubManagementService._compare_pdicts(pdict.dump(), stream_definition.parameter_dictionary)) # Test comparisons in_stream_definition_id = self.pubsub_management.create_stream_definition('L0 products', parameter_dictionary_id=pdict.identifier, available_fields=['time','temp','conductivity','pressure']) self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_definition_id) out_stream_definition_id = in_stream_definition_id self.assertTrue(self.pubsub_management.compare_stream_definition(in_stream_definition_id, out_stream_definition_id)) self.assertTrue(self.pubsub_management.compatible_stream_definitions(in_stream_definition_id, out_stream_definition_id)) out_stream_definition_id = self.pubsub_management.create_stream_definition('L2 Products', parameter_dictionary_id=pdict.identifier, available_fields=['time','salinity','density']) self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_definition_id) self.assertFalse(self.pubsub_management.compare_stream_definition(in_stream_definition_id, out_stream_definition_id)) self.assertTrue(self.pubsub_management.compatible_stream_definitions(in_stream_definition_id, out_stream_definition_id)) @unittest.skip('Needs to be refactored for cleanup') def test_validate_stream_defs(self): self.addCleanup(self.cleanup_contexts) #test no input incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = [] available_fields_out = [] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_0', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_0', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test input with no output incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = [] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_1', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_1', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test available field missing parameter context definition -- missing PRESWAT_L0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0']) outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['DENSITY'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_2', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_2', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test l1 from l0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_3', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_3', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test l2 from l0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1', 'DENSITY', 'PRACSAL']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_4', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_4', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test Ln from L0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['DENSITY','PRACSAL','TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_5', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_5', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test L2 from L1 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) outgoing_pdict_id = self._get_pdict(['DENSITY','PRACSAL','TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_6', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_6', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test L1 from L0 missing L0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON']) outgoing_pdict_id = self._get_pdict(['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_7', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_7', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test L2 from L0 missing L0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON']) outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_8', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_8', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test L2 from L0 missing L1 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_9', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_9', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) def publish_on_stream(self, stream_id, msg): stream = self.pubsub_management.read_stream(stream_id) stream_route = stream.stream_route publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route) publisher.publish(msg) def test_stream_crud(self): stream_def_id = self.pubsub_management.create_stream_definition('test_definition', stream_type='stream') self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) topic_id = self.pubsub_management.create_topic(name='test_topic', exchange_point='test_exchange') self.addCleanup(self.pubsub_management.delete_topic, topic_id) self.exchange_cleanup.append('test_exchange') topic2_id = self.pubsub_management.create_topic(name='another_topic', exchange_point='outside') self.addCleanup(self.pubsub_management.delete_topic, topic2_id) stream_id, route = self.pubsub_management.create_stream(name='test_stream', topic_ids=[topic_id, topic2_id], exchange_point='test_exchange', stream_definition_id=stream_def_id) topics, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasTopic, id_only=True) self.assertEquals(topics,[topic_id]) defs, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True) self.assertTrue(len(defs)) stream = self.pubsub_management.read_stream(stream_id) self.assertEquals(stream.name,'test_stream') self.pubsub_management.delete_stream(stream_id) with self.assertRaises(NotFound): self.pubsub_management.read_stream(stream_id) defs, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True) self.assertFalse(len(defs)) topics, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasTopic, id_only=True) self.assertFalse(len(topics)) def test_data_product_subscription(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) tdom, sdom = time_series_domain() dp = DataProduct(name='ctd parsed') dp.spatial_domain = sdom.dump() dp.temporal_domain = tdom.dump() data_product_id = self.data_product_management.create_data_product(data_product=dp, stream_definition_id=stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) subscription_id = self.pubsub_management.create_subscription('validator', data_product_ids=[data_product_id]) self.addCleanup(self.pubsub_management.delete_subscription, subscription_id) validated = Event() def validation(msg, route, stream_id): validated.set() stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) dp_stream_id = stream_ids.pop() validator = StandaloneStreamSubscriber('validator', callback=validation) validator.start() self.addCleanup(validator.stop) self.pubsub_management.activate_subscription(subscription_id) self.addCleanup(self.pubsub_management.deactivate_subscription, subscription_id) route = self.pubsub_management.read_stream_route(dp_stream_id) publisher = StandaloneStreamPublisher(dp_stream_id, route) publisher.publish('hi') self.assertTrue(validated.wait(10)) def test_subscription_crud(self): stream_def_id = self.pubsub_management.create_stream_definition('test_definition', stream_type='stream') stream_id, route = self.pubsub_management.create_stream(name='test_stream', exchange_point='test_exchange', stream_definition_id=stream_def_id) subscription_id = self.pubsub_management.create_subscription(name='test subscription', stream_ids=[stream_id], exchange_name='test_queue') self.exchange_cleanup.append('test_exchange') subs, assocs = self.resource_registry.find_objects(subject=subscription_id,predicate=PRED.hasStream,id_only=True) self.assertEquals(subs,[stream_id]) res, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='test_queue', id_only=True) self.assertEquals(len(res),1) subs, assocs = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(subs[0], res[0]) subscription = self.pubsub_management.read_subscription(subscription_id) self.assertEquals(subscription.exchange_name, 'test_queue') self.pubsub_management.delete_subscription(subscription_id) subs, assocs = self.resource_registry.find_objects(subject=subscription_id,predicate=PRED.hasStream,id_only=True) self.assertFalse(len(subs)) subs, assocs = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertFalse(len(subs)) self.pubsub_management.delete_stream(stream_id) self.pubsub_management.delete_stream_definition(stream_def_id) def test_move_before_activate(self): stream_id, route = self.pubsub_management.create_stream(name='test_stream', exchange_point='test_xp') #-------------------------------------------------------------------------------- # Test moving before activate #-------------------------------------------------------------------------------- subscription_id = self.pubsub_management.create_subscription('first_queue', stream_ids=[stream_id]) xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='first_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(xn_ids[0], subjects[0]) self.pubsub_management.move_subscription(subscription_id, exchange_name='second_queue') xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='second_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(len(subjects),1) self.assertEquals(subjects[0], xn_ids[0]) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id) def test_move_activated_subscription(self): stream_id, route = self.pubsub_management.create_stream(name='test_stream', exchange_point='test_xp') #-------------------------------------------------------------------------------- # Test moving after activate #-------------------------------------------------------------------------------- subscription_id = self.pubsub_management.create_subscription('first_queue', stream_ids=[stream_id]) self.pubsub_management.activate_subscription(subscription_id) xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='first_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(xn_ids[0], subjects[0]) self.verified = Event() def verify(m,r,s): self.assertEquals(m,'verified') self.verified.set() subscriber = StandaloneStreamSubscriber('second_queue', verify) subscriber.start() self.pubsub_management.move_subscription(subscription_id, exchange_name='second_queue') xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='second_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(len(subjects),1) self.assertEquals(subjects[0], xn_ids[0]) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish('verified') self.assertTrue(self.verified.wait(2)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id) def test_queue_cleanup(self): stream_id, route = self.pubsub_management.create_stream('test_stream','xp1') xn_objs, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='queue1') for xn_obj in xn_objs: xn = self.container.ex_manager.create_xn_queue(xn_obj.name) xn.delete() subscription_id = self.pubsub_management.create_subscription('queue1',stream_ids=[stream_id]) xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='queue1') self.assertEquals(len(xn_ids),1) self.pubsub_management.delete_subscription(subscription_id) xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='queue1') self.assertEquals(len(xn_ids),0) def test_activation_and_deactivation(self): stream_id, route = self.pubsub_management.create_stream('stream1','xp1') subscription_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id]) self.check1 = Event() def verifier(m,r,s): self.check1.set() subscriber = StandaloneStreamSubscriber('sub1',verifier) subscriber.start() publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.25)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) self.check1.clear() self.assertFalse(self.check1.is_set()) self.pubsub_management.deactivate_subscription(subscription_id) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.5)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) subscriber.stop() self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id) def test_topic_crud(self): topic_id = self.pubsub_management.create_topic(name='test_topic', exchange_point='test_xp') self.exchange_cleanup.append('test_xp') topic = self.pubsub_management.read_topic(topic_id) self.assertEquals(topic.name,'test_topic') self.assertEquals(topic.exchange_point, 'test_xp') self.pubsub_management.delete_topic(topic_id) with self.assertRaises(NotFound): self.pubsub_management.read_topic(topic_id) def test_full_pubsub(self): self.sub1_sat = Event() self.sub2_sat = Event() def subscriber1(m,r,s): self.sub1_sat.set() def subscriber2(m,r,s): self.sub2_sat.set() sub1 = StandaloneStreamSubscriber('sub1', subscriber1) sub1.start() self.addCleanup(sub1.stop) sub2 = StandaloneStreamSubscriber('sub2', subscriber2) sub2.start() self.addCleanup(sub2.stop) log_topic = self.pubsub_management.create_topic('instrument_logs', exchange_point='instruments') self.addCleanup(self.pubsub_management.delete_topic, log_topic) science_topic = self.pubsub_management.create_topic('science_data', exchange_point='instruments') self.addCleanup(self.pubsub_management.delete_topic, science_topic) events_topic = self.pubsub_management.create_topic('notifications', exchange_point='events') self.addCleanup(self.pubsub_management.delete_topic, events_topic) log_stream, route = self.pubsub_management.create_stream('instrument1-logs', topic_ids=[log_topic], exchange_point='instruments') self.addCleanup(self.pubsub_management.delete_stream, log_stream) ctd_stream, route = self.pubsub_management.create_stream('instrument1-ctd', topic_ids=[science_topic], exchange_point='instruments') self.addCleanup(self.pubsub_management.delete_stream, ctd_stream) event_stream, route = self.pubsub_management.create_stream('notifications', topic_ids=[events_topic], exchange_point='events') self.addCleanup(self.pubsub_management.delete_stream, event_stream) raw_stream, route = self.pubsub_management.create_stream('temp', exchange_point='global.data') self.addCleanup(self.pubsub_management.delete_stream, raw_stream) subscription1 = self.pubsub_management.create_subscription('subscription1', stream_ids=[log_stream,event_stream], exchange_name='sub1') self.addCleanup(self.pubsub_management.delete_subscription, subscription1) subscription2 = self.pubsub_management.create_subscription('subscription2', exchange_points=['global.data'], stream_ids=[ctd_stream], exchange_name='sub2') self.addCleanup(self.pubsub_management.delete_subscription, subscription2) self.pubsub_management.activate_subscription(subscription1) self.addCleanup(self.pubsub_management.deactivate_subscription, subscription1) self.pubsub_management.activate_subscription(subscription2) self.addCleanup(self.pubsub_management.deactivate_subscription, subscription2) self.publish_on_stream(log_stream, 1) self.assertTrue(self.sub1_sat.wait(4)) self.assertFalse(self.sub2_sat.is_set()) self.publish_on_stream(raw_stream,1) self.assertTrue(self.sub1_sat.wait(4)) def test_topic_craziness(self): self.msg_queue = Queue() def subscriber1(m,r,s): self.msg_queue.put(m) sub1 = StandaloneStreamSubscriber('sub1', subscriber1) sub1.start() self.addCleanup(sub1.stop) topic1 = self.pubsub_management.create_topic('topic1', exchange_point='xp1') self.addCleanup(self.pubsub_management.delete_topic, topic1) topic2 = self.pubsub_management.create_topic('topic2', exchange_point='xp1', parent_topic_id=topic1) self.addCleanup(self.pubsub_management.delete_topic, topic2) topic3 = self.pubsub_management.create_topic('topic3', exchange_point='xp1', parent_topic_id=topic1) self.addCleanup(self.pubsub_management.delete_topic, topic3) topic4 = self.pubsub_management.create_topic('topic4', exchange_point='xp1', parent_topic_id=topic2) self.addCleanup(self.pubsub_management.delete_topic, topic4) topic5 = self.pubsub_management.create_topic('topic5', exchange_point='xp1', parent_topic_id=topic2) self.addCleanup(self.pubsub_management.delete_topic, topic5) topic6 = self.pubsub_management.create_topic('topic6', exchange_point='xp1', parent_topic_id=topic3) self.addCleanup(self.pubsub_management.delete_topic, topic6) topic7 = self.pubsub_management.create_topic('topic7', exchange_point='xp1', parent_topic_id=topic3) self.addCleanup(self.pubsub_management.delete_topic, topic7) # Tree 2 topic8 = self.pubsub_management.create_topic('topic8', exchange_point='xp2') self.addCleanup(self.pubsub_management.delete_topic, topic8) topic9 = self.pubsub_management.create_topic('topic9', exchange_point='xp2', parent_topic_id=topic8) self.addCleanup(self.pubsub_management.delete_topic, topic9) topic10 = self.pubsub_management.create_topic('topic10', exchange_point='xp2', parent_topic_id=topic9) self.addCleanup(self.pubsub_management.delete_topic, topic10) topic11 = self.pubsub_management.create_topic('topic11', exchange_point='xp2', parent_topic_id=topic9) self.addCleanup(self.pubsub_management.delete_topic, topic11) topic12 = self.pubsub_management.create_topic('topic12', exchange_point='xp2', parent_topic_id=topic11) self.addCleanup(self.pubsub_management.delete_topic, topic12) topic13 = self.pubsub_management.create_topic('topic13', exchange_point='xp2', parent_topic_id=topic11) self.addCleanup(self.pubsub_management.delete_topic, topic13) self.exchange_cleanup.extend(['xp1','xp2']) stream1_id, route = self.pubsub_management.create_stream('stream1', topic_ids=[topic7, topic4, topic5], exchange_point='xp1') self.addCleanup(self.pubsub_management.delete_stream, stream1_id) stream2_id, route = self.pubsub_management.create_stream('stream2', topic_ids=[topic8], exchange_point='xp2') self.addCleanup(self.pubsub_management.delete_stream, stream2_id) stream3_id, route = self.pubsub_management.create_stream('stream3', topic_ids=[topic10,topic13], exchange_point='xp2') self.addCleanup(self.pubsub_management.delete_stream, stream3_id) stream4_id, route = self.pubsub_management.create_stream('stream4', topic_ids=[topic9], exchange_point='xp2') self.addCleanup(self.pubsub_management.delete_stream, stream4_id) stream5_id, route = self.pubsub_management.create_stream('stream5', topic_ids=[topic11], exchange_point='xp2') self.addCleanup(self.pubsub_management.delete_stream, stream5_id) subscription1 = self.pubsub_management.create_subscription('sub1', topic_ids=[topic1]) self.addCleanup(self.pubsub_management.delete_subscription, subscription1) subscription2 = self.pubsub_management.create_subscription('sub2', topic_ids=[topic8], exchange_name='sub1') self.addCleanup(self.pubsub_management.delete_subscription, subscription2) subscription3 = self.pubsub_management.create_subscription('sub3', topic_ids=[topic9], exchange_name='sub1') self.addCleanup(self.pubsub_management.delete_subscription, subscription3) subscription4 = self.pubsub_management.create_subscription('sub4', topic_ids=[topic10,topic13, topic11], exchange_name='sub1') self.addCleanup(self.pubsub_management.delete_subscription, subscription4) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription1) self.publish_on_stream(stream1_id,1) self.assertEquals(self.msg_queue.get(timeout=10), 1) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.1) self.pubsub_management.deactivate_subscription(subscription1) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription2) self.publish_on_stream(stream2_id,2) self.assertEquals(self.msg_queue.get(timeout=10), 2) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.1) self.pubsub_management.deactivate_subscription(subscription2) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription3) self.publish_on_stream(stream2_id, 3) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.publish_on_stream(stream3_id, 4) self.assertEquals(self.msg_queue.get(timeout=10),4) self.pubsub_management.deactivate_subscription(subscription3) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription4) self.publish_on_stream(stream4_id, 5) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.publish_on_stream(stream5_id, 6) self.assertEquals(self.msg_queue.get(timeout=10),6) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.pubsub_management.deactivate_subscription(subscription4) #-------------------------------------------------------------------------------- def cleanup_contexts(self): for context_id in self.context_ids: self.dataset_management.delete_parameter_context(context_id) def add_context_to_cleanup(self, context_id): self.context_ids.add(context_id) def _get_pdict(self, filter_values): t_ctxt = ParameterContext('TIME', param_type=QuantityType(value_encoding=np.dtype('int64'))) t_ctxt.uom = 'seconds since 01-01-1900' t_ctxt_id = self.dataset_management.create_parameter_context(name='TIME', parameter_context=t_ctxt.dump(), parameter_type='quantity<int64>', units=t_ctxt.uom) self.add_context_to_cleanup(t_ctxt_id) lat_ctxt = ParameterContext('LAT', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999) lat_ctxt.axis = AxisTypeEnum.LAT lat_ctxt.uom = 'degree_north' lat_ctxt_id = self.dataset_management.create_parameter_context(name='LAT', parameter_context=lat_ctxt.dump(), parameter_type='quantity<float32>', units=lat_ctxt.uom) self.add_context_to_cleanup(lat_ctxt_id) lon_ctxt = ParameterContext('LON', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999) lon_ctxt.axis = AxisTypeEnum.LON lon_ctxt.uom = 'degree_east' lon_ctxt_id = self.dataset_management.create_parameter_context(name='LON', parameter_context=lon_ctxt.dump(), parameter_type='quantity<float32>', units=lon_ctxt.uom) self.add_context_to_cleanup(lon_ctxt_id) # Independent Parameters # Temperature - values expected to be the decimal results of conversion from hex temp_ctxt = ParameterContext('TEMPWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) temp_ctxt.uom = 'deg_C' temp_ctxt_id = self.dataset_management.create_parameter_context(name='TEMPWAT_L0', parameter_context=temp_ctxt.dump(), parameter_type='quantity<float32>', units=temp_ctxt.uom) self.add_context_to_cleanup(temp_ctxt_id) # Conductivity - values expected to be the decimal results of conversion from hex cond_ctxt = ParameterContext('CONDWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) cond_ctxt.uom = 'S m-1' cond_ctxt_id = self.dataset_management.create_parameter_context(name='CONDWAT_L0', parameter_context=cond_ctxt.dump(), parameter_type='quantity<float32>', units=cond_ctxt.uom) self.add_context_to_cleanup(cond_ctxt_id) # Pressure - values expected to be the decimal results of conversion from hex press_ctxt = ParameterContext('PRESWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) press_ctxt.uom = 'dbar' press_ctxt_id = self.dataset_management.create_parameter_context(name='PRESWAT_L0', parameter_context=press_ctxt.dump(), parameter_type='quantity<float32>', units=press_ctxt.uom) self.add_context_to_cleanup(press_ctxt_id) # Dependent Parameters # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 tl1_func = '(T / 10000) - 10' tl1_pmap = {'T': 'TEMPWAT_L0'} expr = NumexprFunction('TEMPWAT_L1', tl1_func, ['T'], param_map=tl1_pmap) tempL1_ctxt = ParameterContext('TEMPWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) tempL1_ctxt.uom = 'deg_C' tempL1_ctxt_id = self.dataset_management.create_parameter_context(name=tempL1_ctxt.name, parameter_context=tempL1_ctxt.dump(), parameter_type='pfunc', units=tempL1_ctxt.uom) self.add_context_to_cleanup(tempL1_ctxt_id) # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 cl1_func = '(C / 100000) - 0.5' cl1_pmap = {'C': 'CONDWAT_L0'} expr = NumexprFunction('CONDWAT_L1', cl1_func, ['C'], param_map=cl1_pmap) condL1_ctxt = ParameterContext('CONDWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) condL1_ctxt.uom = 'S m-1' condL1_ctxt_id = self.dataset_management.create_parameter_context(name=condL1_ctxt.name, parameter_context=condL1_ctxt.dump(), parameter_type='pfunc', units=condL1_ctxt.uom) self.add_context_to_cleanup(condL1_ctxt_id) # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)' pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721} expr = NumexprFunction('PRESWAT_L1', pl1_func, ['P', 'p_range'], param_map=pl1_pmap) presL1_ctxt = ParameterContext('PRESWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) presL1_ctxt.uom = 'S m-1' presL1_ctxt_id = self.dataset_management.create_parameter_context(name=presL1_ctxt.name, parameter_context=presL1_ctxt.dump(), parameter_type='pfunc', units=presL1_ctxt.uom) self.add_context_to_cleanup(presL1_ctxt_id) # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project: # https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1 # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) owner = 'gsw' sal_func = 'SP_from_C' sal_arglist = ['C', 't', 'p'] sal_pmap = {'C': NumexprFunction('CONDWAT_L1*10', 'C*10', ['C'], param_map={'C': 'CONDWAT_L1'}), 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'} sal_kwargmap = None expr = PythonFunction('PRACSAL', owner, sal_func, sal_arglist, sal_kwargmap, sal_pmap) sal_ctxt = ParameterContext('PRACSAL', param_type=ParameterFunctionType(expr), variability=VariabilityEnum.TEMPORAL) sal_ctxt.uom = 'g kg-1' sal_ctxt_id = self.dataset_management.create_parameter_context(name=sal_ctxt.name, parameter_context=sal_ctxt.dump(), parameter_type='pfunc', units=sal_ctxt.uom) self.add_context_to_cleanup(sal_ctxt_id) # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) owner = 'gsw' abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON','LAT']) cons_temp_expr = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1']) dens_expr = PythonFunction('DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1']) dens_ctxt = ParameterContext('DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL) dens_ctxt.uom = 'kg m-3' dens_ctxt_id = self.dataset_management.create_parameter_context(name=dens_ctxt.name, parameter_context=dens_ctxt.dump(), parameter_type='pfunc', units=dens_ctxt.uom) self.add_context_to_cleanup(dens_ctxt_id) ids = [t_ctxt_id, lat_ctxt_id, lon_ctxt_id, temp_ctxt_id, cond_ctxt_id, press_ctxt_id, tempL1_ctxt_id, condL1_ctxt_id, presL1_ctxt_id, sal_ctxt_id, dens_ctxt_id] contexts = [t_ctxt, lat_ctxt, lon_ctxt, temp_ctxt, cond_ctxt, press_ctxt, tempL1_ctxt, condL1_ctxt, presL1_ctxt, sal_ctxt, dens_ctxt] context_ids = [ids[i] for i,ctxt in enumerate(contexts) if ctxt.name in filter_values] pdict_name = '_'.join([ctxt.name for ctxt in contexts if ctxt.name in filter_values]) try: self.pdicts[pdict_name] return self.pdicts[pdict_name] except KeyError: pdict_id = self.dataset_management.create_parameter_dictionary(pdict_name, parameter_context_ids=context_ids, temporal_context='time') self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) self.pdicts[pdict_name] = pdict_id return pdict_id
class TestTransformWorker(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dataset_management_client = DatasetManagementServiceClient(node=self.container.node) self.pubsub_client = PubsubManagementServiceClient(node=self.container.node) self.dataproductclient = DataProductManagementServiceClient(node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node) self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.time_dom, self.spatial_dom = time_series_domain() @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker(self): self.loggerpids = [] self.data_process_objs = [] self._output_stream_ids = [] self.start_transform_worker() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) #retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] #create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name='parsed_subscription') self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route ) self.start_event_listener() self.dp_list = self.create_data_processes() self.data_modified = Event() self.data_modified.wait(5) rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) self.data_modified.wait(5) # Cleanup processes for pid in self.loggerpids: self.processdispatchclient.cancel_process(pid) def create_data_processes(self): #two data processes using one transform and one DPD dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products() configuration = { 'argument_map':{'arr1':'conductivity', 'arr2':'pressure'}, 'output_param' : 'salinity' } # Set up DPD and DP #2 - array add function tf_obj = IonObject(RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject(RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, ) self.add_array_dpd_id = self.dataprocessclient.create_data_process_definition_new(data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, self.add_array_dpd_id, binding='add_array_func' ) # Create the data process dp1_data_process_id = self.dataprocessclient.create_data_process_new(data_process_definition_id=self.add_array_dpd_id, in_data_product_ids=[self.input_dp_id], out_data_product_ids=[dp1_func_output_dp_id], configuration=configuration) # Create the data process dp2_func_data_process_id = self.dataprocessclient.create_data_process_new(data_process_definition_id=self.add_array_dpd_id, in_data_product_ids=[self.input_dp_id], out_data_product_ids=[dp2_func_output_dp_id], configuration=configuration) return [dp1_data_process_id, dp2_func_data_process_id] def create_output_data_products(self): dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition(name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id) dp1_output_dp_obj = IonObject( RT.DataProduct, name='data_process1_data_product', description='output of add array func', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) dp1_func_output_dp_id = self.dataproductclient.create_data_product(dp1_output_dp_obj, dp1_outgoing_stream_id) self.addCleanup(self.dataproductclient.delete_data_product, dp1_func_output_dp_id) # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id, PRED.hasStream, None, True) self._output_stream_ids.append(stream_ids[0]) dp2_func_outgoing_stream_id = self.pubsub_client.create_stream_definition(name='dp2_stream', parameter_dictionary_id=self.parameter_dict_id) dp2_func_output_dp_obj = IonObject( RT.DataProduct, name='data_process2_data_product', description='output of add array func', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) dp2_func_output_dp_id = self.dataproductclient.create_data_product(dp2_func_output_dp_obj, dp2_func_outgoing_stream_id) self.addCleanup(self.dataproductclient.delete_data_product, dp2_func_output_dp_id) # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger stream_ids, _ = self.rrclient.find_objects(dp2_func_output_dp_id, PRED.hasStream, None, True) self._output_stream_ids.append(stream_ids[0]) subscription_id = self.pubsub_client.create_subscription('validator', data_product_ids=[dp1_func_output_dp_id, dp2_func_output_dp_id]) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) def on_granule(msg, route, stream_id): log.debug('recv_packet stream_id: %s route: %s msg: %s', stream_id, route, msg) self.validate_output_granule(msg, route, stream_id) validator = StandaloneStreamSubscriber('validator', callback=on_granule) validator.start() self.addCleanup(validator.stop) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) return dp1_func_output_dp_id, dp2_func_output_dp_id def validate_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ data_process_event = args[0] log.debug("DataProcessStatusEvent: %s" , str(data_process_event.__dict__)) self.assertTrue( data_process_event.origin in self.dp_list) def validate_output_granule(self, msg, route, stream_id): self.assertTrue( stream_id in self._output_stream_ids) rdt = RecordDictionaryTool.load_from_granule(msg) log.debug('validate_output_granule rdt: %s', rdt) sal_val = rdt['salinity'] #self.assertTrue( sal_val == 3) np.testing.assert_array_equal(sal_val, np.array([3])) def start_event_listener(self): es = EventSubscriber(event_type=OT.DataProcessStatusEvent, callback=self.validate_event) es.start() self.addCleanup(es.stop) def start_transform_worker(self): config = DotDict() config.process.queue_name = 'parsed_subscription' self.container.spawn_process( name='transform_worker', module='ion.processes.data.transforms.transform_worker', cls='TransformWorker', config=config ) def test_download(self): egg_url = 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' egg_path = TransformWorker.download_egg(egg_url) import pkg_resources pkg_resources.working_set.add_entry(egg_path) from ion_example.add_arrays import add_arrays a = add_arrays(1,2) self.assertEquals(a,3)
class TestGranulePublish(IonIntegrationTestCase): def setUp(self): # Start container self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.pubsubclient = PubsubManagementServiceClient( node=self.container.node) self.dpclient = DataProductManagementServiceClient( node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient( node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient( node=self.container.node) self.dataproductclient = DataProductManagementServiceClient( node=self.container.node) self.dataset_management = DatasetManagementServiceClient() def create_logger(self, name, stream_id=''): # logger process producer_definition = ProcessDefinition(name=name + '_logger') producer_definition.executable = { 'module': 'ion.processes.data.stream_granule_logger', 'class': 'StreamGranuleLogger' } logger_procdef_id = self.processdispatchclient.create_process_definition( process_definition=producer_definition) configuration = { 'process': { 'stream_id': stream_id, } } pid = self.processdispatchclient.schedule_process( process_definition_id=logger_procdef_id, configuration=configuration) return pid #overriding trigger function here to use new granule def test_granule_publish(self): log.debug("test_granule_publish ") self.loggerpids = [] #retrieve the param dict from the repository pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) stream_definition_id = self.pubsubclient.create_stream_definition( 'parsed stream', parameter_dictionary_id=pdict_id) tdom, sdom = time_series_domain() dp_obj = IonObject(RT.DataProduct, name=str(uuid.uuid4()), description='ctd stream test', temporal_domain=tdom.dump(), spatial_domain=sdom.dump()) data_product_id1 = self.dpclient.create_data_product( data_product=dp_obj, stream_definition_id=stream_definition_id) # Retrieve the id of the output stream of the out Data Product stream_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasStream, None, True) log.debug('test_granule_publish: Data product streams1 = %s', stream_ids) pid = self.create_logger('ctd_parsed', stream_ids[0]) self.loggerpids.append(pid) rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) #create the publisher from the stream route stream_route = self.pubsubclient.read_stream_route(stream_ids[0]) publisher = StandaloneStreamPublisher(stream_ids[0], stream_route) # this is one sample from the ctd driver tomato = { "driver_timestamp": 3555971105.1268806, "instrument_id": "ABC-123", "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed", "values": [{ "value": 22.9304, "value_id": "temp" }, { "value": 51.57381, "value_id": "conductivity" }, { "value": 915.551, "value_id": "pressure" }] } for value in tomato['values']: log.debug( "test_granule_publish: Looping tomato values key: %s val: %s ", str(value['value']), str(value['value_id'])) if value['value_id'] in rdt: rdt[value['value_id']] = numpy.array([value['value']]) log.debug( "test_granule_publish: Added data item %s val: %s ", str(value['value']), str(value['value_id'])) g = rdt.to_granule() publisher.publish(g) gevent.sleep(3) for pid in self.loggerpids: self.processdispatchclient.cancel_process(pid) #-------------------------------------------------------------------------------- # Cleanup data products #-------------------------------------------------------------------------------- dp_ids, _ = self.rrclient.find_resources(restype=RT.DataProduct, id_only=True) for dp_id in dp_ids: self.dataproductclient.delete_data_product(dp_id)
class TestTransformWorkerSubscriptions(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dataset_management_client = DatasetManagementServiceClient(node=self.container.node) self.pubsub_client = PubsubManagementServiceClient(node=self.container.node) self.dataproductclient = DataProductManagementServiceClient(node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node) self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.time_dom, self.spatial_dom = time_series_domain() self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_multi_subscriptions(self): self.dp_list = [] self.event1_verified = Event() self.event2_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject( RT.DataProduct, name='input_data_product_one', description='input test stream one', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_one_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) input_dp_obj = IonObject( RT.DataProduct, name='input_data_product_two', description='input test stream two', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_two_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) #retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_one_id, PRED.hasStream, RT.Stream, True) self.stream_one_id = stream_ids[0] stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_two_id, PRED.hasStream, RT.Stream, True) self.stream_two_id = stream_ids[0] dpd_id = self.create_data_process_definition() dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products() first_dp_id = self.create_data_process_one(dpd_id, dp1_func_output_dp_id) second_dp_id = self.create_data_process_two(dpd_id, self.input_dp_two_id, dp2_func_output_dp_id) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects(subject=first_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #create subscription to stream ONE, create data process and publish granule on stream ONE #create a queue to catch the published granules of stream ONE self.subscription_one_id = self.pubsub_client.create_subscription(name='parsed_subscription_one', stream_ids=[self.stream_one_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_one_id) self.pubsub_client.activate_subscription(self.subscription_one_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_one_id) stream_route_one = self.pubsub_client.read_stream_route(self.stream_one_id) self.publisher_one = StandaloneStreamPublisher(stream_id=self.stream_one_id, stream_route=stream_route_one ) self.start_event_listener() #data process 1 adds conductivity + pressure and puts the result in salinity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects(subject=second_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #create subscription to stream ONE and TWO, move TW subscription, create data process and publish granule on stream TWO #create a queue to catch the published granules of stream TWO self.subscription_two_id = self.pubsub_client.create_subscription(name='parsed_subscription_one_two', stream_ids=[self.stream_two_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_two_id) self.pubsub_client.activate_subscription(self.subscription_two_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_two_id) stream_route_two = self.pubsub_client.read_stream_route(self.stream_two_id) self.publisher_two = StandaloneStreamPublisher(stream_id=self.stream_two_id, stream_route=stream_route_two ) #data process 1 adds conductivity + pressure and puts the result in salinity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) #data process 2 adds salinity + pressure and puts the result in conductivity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [22] rdt['pressure'] = [4] rdt['salinity'] = [1] self.publisher_two.publish(msg=rdt.to_granule(), stream_id=self.stream_two_id) self.assertTrue(self.event2_verified.wait(self.wait_time)) self.assertTrue(self.event1_verified.wait(self.wait_time)) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_two_transforms_inline(self): self.dp_list = [] self.event1_verified = Event() self.event2_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject( RT.DataProduct, name='input_data_product_one', description='input test stream one', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_one_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) dpd_id = self.create_data_process_definition() dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products() first_dp_id = self.create_data_process_one(dpd_id, dp1_func_output_dp_id) second_dp_id = self.create_data_process_two(dpd_id, dp1_func_output_dp_id, dp2_func_output_dp_id) #retrieve subscription from data process one subscription_objs, _ = self.rrclient.find_objects(subject=first_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #retrieve the Stream for these data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_one_id, PRED.hasStream, RT.Stream, True) self.stream_one_id = stream_ids[0] #the input to data process two is the output from data process one stream_ids, assoc_ids = self.rrclient.find_objects(dp1_func_output_dp_id, PRED.hasStream, RT.Stream, True) self.stream_two_id = stream_ids[0] # Run provenance on the output dataproduct of the second data process to see all the links # are as expected output_data_product_provenance = self.dataproductclient.get_data_product_provenance(dp2_func_output_dp_id) # Do a basic check to see if there were 2 entries in the provenance graph. Parent and Child. self.assertTrue(len(output_data_product_provenance) == 3) # confirm that the linking from the output dataproduct to input dataproduct is correct self.assertTrue(dp1_func_output_dp_id in output_data_product_provenance[dp2_func_output_dp_id]['parents']) self.assertTrue(self.input_dp_one_id in output_data_product_provenance[dp1_func_output_dp_id]['parents']) #create subscription to stream ONE, create data process and publish granule on stream ONE #create a queue to catch the published granules of stream ONE subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_one_id, self.stream_two_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) stream_route_one = self.pubsub_client.read_stream_route(self.stream_one_id) self.publisher_one = StandaloneStreamPublisher(stream_id=self.stream_one_id, stream_route=stream_route_one ) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects(subject=second_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #data process 1 adds conductivity + pressure and puts the result in salinity #data process 2 adds salinity + pressure and puts the result in conductivity self.start_event_listener() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) self.assertTrue(self.event2_verified.wait(self.wait_time)) self.assertTrue(self.event1_verified.wait(self.wait_time)) def create_data_process_definition(self): #two data processes using one transform and one DPD # Set up DPD and DP #2 - array add function tf_obj = IonObject(RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM, uri='http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject(RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, ) add_array_dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, add_array_dpd_id, binding='add_array_func' ) return add_array_dpd_id def create_data_process_one(self, data_process_definition_id, output_dataproduct): # Create the data process #data process 1 adds conductivity + pressure and puts the result in salinity argument_map = {"arr1":"conductivity", "arr2":"pressure"} output_param = "salinity" dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=data_process_definition_id, inputs=[self.input_dp_one_id], outputs=[output_dataproduct], argument_map=argument_map, out_param_name=output_param) self.damsclient.register_process(dp1_data_process_id) self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id) self.dp_list.append(dp1_data_process_id) return dp1_data_process_id def create_data_process_two(self, data_process_definition_id, input_dataproduct, output_dataproduct): # Create the data process #data process 2 adds salinity + pressure and puts the result in conductivity argument_map = {'arr1':'salinity', 'arr2':'pressure'} output_param = 'conductivity' dp2_func_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=data_process_definition_id, inputs=[input_dataproduct], outputs=[output_dataproduct], argument_map=argument_map, out_param_name=output_param) self.damsclient.register_process(dp2_func_data_process_id) self.addCleanup(self.dataprocessclient.delete_data_process, dp2_func_data_process_id) self.dp_list.append(dp2_func_data_process_id) return dp2_func_data_process_id def create_output_data_products(self): dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition(name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id) dp1_output_dp_obj = IonObject( RT.DataProduct, name='data_process1_data_product', description='output of add array func', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) dp1_func_output_dp_id = self.dataproductclient.create_data_product(dp1_output_dp_obj, dp1_outgoing_stream_id) self.addCleanup(self.dataproductclient.delete_data_product, dp1_func_output_dp_id) # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id, PRED.hasStream, None, True) self._output_stream_one_id = stream_ids[0] dp2_func_outgoing_stream_id = self.pubsub_client.create_stream_definition(name='dp2_stream', parameter_dictionary_id=self.parameter_dict_id) dp2_func_output_dp_obj = IonObject( RT.DataProduct, name='data_process2_data_product', description='output of add array func', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) dp2_func_output_dp_id = self.dataproductclient.create_data_product(dp2_func_output_dp_obj, dp2_func_outgoing_stream_id) self.addCleanup(self.dataproductclient.delete_data_product, dp2_func_output_dp_id) # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger stream_ids, _ = self.rrclient.find_objects(dp2_func_output_dp_id, PRED.hasStream, None, True) self._output_stream_two_id = stream_ids[0] subscription_id = self.pubsub_client.create_subscription('validator', data_product_ids=[dp1_func_output_dp_id, dp2_func_output_dp_id]) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) def on_granule(msg, route, stream_id): log.debug('recv_packet stream_id: %s route: %s msg: %s', stream_id, route, msg) self.validate_output_granule(msg, route, stream_id) validator = StandaloneStreamSubscriber('validator', callback=on_granule) validator.start() self.addCleanup(validator.stop) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) return dp1_func_output_dp_id, dp2_func_output_dp_id def validate_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ data_process_event = args[0] log.debug("DataProcessStatusEvent: %s" , str(data_process_event.__dict__)) #if data process already created, check origin if not 'data process assigned to transform worker' in data_process_event.description: self.assertIn( data_process_event.origin, self.dp_list) def validate_output_granule(self, msg, route, stream_id): self.assertTrue( stream_id in [self._output_stream_one_id, self._output_stream_two_id]) rdt = RecordDictionaryTool.load_from_granule(msg) log.debug('validate_output_granule stream_id: %s', stream_id) if stream_id == self._output_stream_one_id: sal_val = rdt['salinity'] log.debug('validate_output_granule sal_val: %s', sal_val) np.testing.assert_array_equal(sal_val, np.array([3])) self.event1_verified.set() else: cond_val = rdt['conductivity'] log.debug('validate_output_granule cond_val: %s', cond_val) np.testing.assert_array_equal(cond_val, np.array([5])) self.event2_verified.set() def start_event_listener(self): es = EventSubscriber(event_type=OT.DataProcessStatusEvent, callback=self.validate_event) es.start() self.addCleanup(es.stop)
class ExternalDatasetAgentTestBase(object): # Agent parameters. EDA_RESOURCE_ID = '123xyz' EDA_NAME = 'ExampleEDA' EDA_MOD = 'ion.agents.data.external_dataset_agent' EDA_CLS = 'ExternalDatasetAgent' """ Test cases for instrument agent class. Functions in this class provide instrument agent integration tests and provide a tutorial on use of the agent setup and interface. """ def setUp(self): """ Initialize test members. """ #log.warn('Starting the container') # Start container. self._start_container() # Bring up services in a deploy file #log.warn('Starting the rel') self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Create a pubsub client to create streams. # log.warn('Init a pubsub client') self._pubsub_client = PubsubManagementServiceClient(node=self.container.node) # log.warn('Init a ContainerAgentClient') self._container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) # Data async and subscription TODO: Replace with new subscriber self._finished_count = None #TODO: Switch to gevent.queue.Queue self._async_finished_result = AsyncResult() self._finished_events_received = [] self._finished_event_subscriber = None self._start_finished_event_subscriber() self.addCleanup(self._stop_finished_event_subscriber) # TODO: Finish dealing with the resources and whatnot # TODO: DVR_CONFIG and (potentially) stream_config could both be reconfigured in self._setup_resources() self._setup_resources() #TG: Setup/configure the granule logger to log granules as they're published # Create agent config. agent_config = { 'driver_config': self.DVR_CONFIG, 'stream_config': {}, 'agent': {'resource_id': self.EDA_RESOURCE_ID}, 'test_mode': True } # Start instrument agent. self._ia_pid = None log.debug('TestInstrumentAgent.setup(): starting EDA.') self._ia_pid = self._container_client.spawn_process( name=self.EDA_NAME, module=self.EDA_MOD, cls=self.EDA_CLS, config=agent_config ) log.info('Agent pid=%s.', str(self._ia_pid)) # Start a resource agent client to talk with the instrument agent. self._ia_client = None self._ia_client = ResourceAgentClient(self.EDA_RESOURCE_ID, process=FakeProcess()) log.info('Got ia client %s.', str(self._ia_client)) ######################################## # Private "setup" functions ######################################## def _setup_resources(self): raise NotImplementedError('_setup_resources must be implemented in the subclass') def create_stream_and_logger(self, name, stream_id='', pdict=None): stream_def_id = '' if not stream_id or stream_id is '': if pdict: stream_def_id = self._pubsub_client.create_stream_definition(parameter_dictionary=pdict.dump(), stream_type='stream') stream_id, route = self._pubsub_client.create_stream(name=name, exchange_point='science_data', stream_definition_id=stream_def_id) else: route = self._pubsub_client.read_stream_route(stream_id=stream_id) stream_def = self._pubsub_client.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id pid = self._container_client.spawn_process( name=name + '_logger', module='ion.processes.data.stream_granule_logger', cls='StreamGranuleLogger', config={'process': {'stream_id': stream_id}} ) log.info('Started StreamGranuleLogger \'{0}\' subscribed to stream_id={1}'.format(pid, stream_id)) return stream_id, route, stream_def_id def _start_finished_event_subscriber(self): def consume_event(*args, **kwargs): if args[0].description == 'TestingFinished': log.debug('TestingFinished event received') self._finished_events_received.append(args[0]) if self._finished_count and self._finished_count == len(self._finished_events_received): log.debug('Finishing test...') self._async_finished_result.set(len(self._finished_events_received)) log.debug('Called self._async_finished_result.set({0})'.format(len(self._finished_events_received))) self._finished_event_subscriber = EventSubscriber(event_type='DeviceEvent', callback=consume_event) self._finished_event_subscriber.start() def _stop_finished_event_subscriber(self): if self._finished_event_subscriber: self._finished_event_subscriber.stop() self._finished_event_subscriber = None ######################################## # Custom assertion functions ######################################## def assertListsEqual(self, lst1, lst2): lst1.sort() lst2.sort() return lst1 == lst2 def assertSampleDict(self, val): """ Verify the value is a sample dictionary for the sbe37. """ #{'p': [-6.945], 'c': [0.08707], 't': [20.002], 'time': [1333752198.450622]} self.assertTrue(isinstance(val, dict)) self.assertTrue('c' in val) self.assertTrue('t' in val) self.assertTrue('p' in val) self.assertTrue('time' in val) c = val['c'][0] t = val['t'][0] p = val['p'][0] time = val['time'][0] self.assertTrue(isinstance(c, float)) self.assertTrue(isinstance(t, float)) self.assertTrue(isinstance(p, float)) self.assertTrue(isinstance(time, float)) def assertParamDict(self, pd, all_params=False): """ Verify all device parameters exist and are correct type. """ if all_params: self.assertEqual(set(pd.keys()), set(PARAMS.keys())) for (key, type_val) in PARAMS.iteritems(): if type_val == list or type_val == tuple: self.assertTrue(isinstance(pd[key], (list, tuple))) else: self.assertTrue(isinstance(pd[key], type_val)) else: for (key, val) in pd.iteritems(): self.assertTrue(key in PARAMS) self.assertTrue(isinstance(val, PARAMS[key])) def assertParamVals(self, params, correct_params): """ Verify parameters take the correct values. """ self.assertEqual(set(params.keys()), set(correct_params.keys())) for (key, val) in params.iteritems(): correct_val = correct_params[key] if isinstance(val, float): # Verify to 5% of the larger value. max_val = max(abs(val), abs(correct_val)) self.assertAlmostEqual(val, correct_val, delta=max_val * .01) elif isinstance(val, (list, tuple)): # list of tuple. self.assertEqual(list(val), list(correct_val)) else: # int, bool, str. self.assertEqual(val, correct_val) ######################################## # Test functions ######################################## def test_acquire_data_while_streaming(self): # Test instrument driver execute interface to start and stop streaming mode. state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.UNINITIALIZED) cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.INACTIVE) cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.IDLE) cmd = AgentCommand(command=ResourceAgentEvent.RUN) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.COMMAND) params = { 'POLLING_INTERVAL': 3 } self._ia_client.set_resource(params) self._finished_count = 1 cmd = AgentCommand(command=DriverEvent.START_AUTOSAMPLE) self._ia_client.execute_resource(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.STREAMING) config = get_safe(self.DVR_CONFIG, 'dh_cfg', {}) log.info('Send a constrained request for data: constraints = HIST_CONSTRAINTS_1') config['stream_id'], config['stream_route'], _ = self.create_stream_and_logger(name='stream_id_for_historical_1') config['constraints'] = self.HIST_CONSTRAINTS_1 cmd = AgentCommand(command=DriverEvent.ACQUIRE_SAMPLE, args=[config]) self._ia_client.execute_resource(cmd) cmd = AgentCommand(command=DriverEvent.STOP_AUTOSAMPLE) self._ia_client.execute_resource(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.COMMAND) finished = self._async_finished_result.get(timeout=120) self.assertEqual(finished, self._finished_count) cmd = AgentCommand(command=ResourceAgentEvent.RESET) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.UNINITIALIZED) def test_acquire_data(self): cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.INACTIVE) cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.IDLE) cmd = AgentCommand(command=ResourceAgentEvent.RUN) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.COMMAND) log.warn('Send an unconstrained request for data (\'new data\')') cmd = AgentCommand(command=DriverEvent.ACQUIRE_SAMPLE) self._ia_client.execute_resource(command=cmd) state = self._ia_client.get_agent_state() log.info(state) self.assertEqual(state, ResourceAgentState.COMMAND) self._finished_count = 2 config_mods = {} log.info('Send a constrained request for data: constraints = HIST_CONSTRAINTS_1') config_mods['stream_id'], config_mods['stream_route'], _ = self.create_stream_and_logger(name='stream_id_for_historical_1') config_mods['constraints'] = self.HIST_CONSTRAINTS_1 cmd = AgentCommand(command=DriverEvent.ACQUIRE_SAMPLE, args=[config_mods]) self._ia_client.execute_resource(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.COMMAND) log.info('Send a second constrained request for data: constraints = HIST_CONSTRAINTS_2') config_mods['stream_id'], config_mods['stream_route'], _ = self.create_stream_and_logger(name='stream_id_for_historical_2') config_mods['constraints'] = self.HIST_CONSTRAINTS_2 cmd = AgentCommand(command=DriverEvent.ACQUIRE_SAMPLE, args=[config_mods]) self._ia_client.execute_resource(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.COMMAND) finished = self._async_finished_result.get(timeout=120) self.assertEqual(finished, self._finished_count) cmd = AgentCommand(command=ResourceAgentEvent.RESET) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.UNINITIALIZED) def test_streaming(self): state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.UNINITIALIZED) cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.INACTIVE) cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.IDLE) cmd = AgentCommand(command=ResourceAgentEvent.RUN) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.COMMAND) params = { 'POLLING_INTERVAL': 3 } self._ia_client.set_resource(params) self._finished_count = 3 cmd = AgentCommand(command=DriverEvent.START_AUTOSAMPLE) self._ia_client.execute_resource(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.STREAMING) #Assert that data was received # self._async_finished_result.get(timeout=600) # self.assertTrue(len(self._finished_events_received) >= 3) cmd = AgentCommand(command=DriverEvent.STOP_AUTOSAMPLE) self._ia_client.execute_resource(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.COMMAND) cmd = AgentCommand(command=ResourceAgentEvent.RESET) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.UNINITIALIZED) def test_command(self): # Test instrument driver get and set interface. state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.UNINITIALIZED) cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.INACTIVE) cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.IDLE) cmd = AgentCommand(command=ResourceAgentEvent.RUN) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.COMMAND) # Retrieve all resource parameters. reply = self._ia_client.get_resource(params=['DRIVER_PARAMETER_ALL']) self.assertParamDict(reply, True) ## Retrieve a subset of resource parameters. params = [ 'POLLING_INTERVAL' ] reply = self._ia_client.get_resource(params=params) self.assertParamDict(reply) orig_params = reply # Set a subset of resource parameters. new_params = { 'POLLING_INTERVAL': (orig_params['POLLING_INTERVAL'] * 2), } self._ia_client.set_resource(params=new_params) check_new_params = self._ia_client.get_resource(params) self.assertParamVals(check_new_params, new_params) cmd = AgentCommand(command=ResourceAgentEvent.RESET) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.UNINITIALIZED) def test_get_set_resource(self): cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=ResourceAgentEvent.RUN) self._ia_client.execute_agent(cmd) # Get a couple parameters retval = self._ia_client.get_resource(['POLLING_INTERVAL', 'PATCHABLE_CONFIG_KEYS']) log.debug('Retrieved parameters from agent: {0}'.format(retval)) self.assertTrue(isinstance(retval, dict)) self.assertEqual(type(retval['POLLING_INTERVAL']), int) self.assertEqual(type(retval['PATCHABLE_CONFIG_KEYS']), list) # Attempt to get a parameter that doesn't exist log.debug('Try getting a non-existent parameter \'BAD_PARAM\'') with self.assertRaises(ServerError): self._ia_client.get_resource(['BAD_PARAM']) # Set the polling_interval to a new value, then get it to make sure it set properly self._ia_client.set_resource({'POLLING_INTERVAL': 10}) retval = self._ia_client.get_resource(['POLLING_INTERVAL']) log.debug('Retrieved parameters from agent: {0}'.format(retval)) self.assertTrue(isinstance(retval, dict)) self.assertEqual(retval['POLLING_INTERVAL'], 10) # Attempt to set a parameter that doesn't exist log.debug('Try setting a non-existent parameter \'BAD_PARAM\'') with self.assertRaises(ServerError): self._ia_client.set_resource({'BAD_PARAM': 'bad_val'}) # Attempt to set one parameter that does exist, and one that doesn't with self.assertRaises(ServerError): self._ia_client.set_resource({'POLLING_INTERVAL': 20, 'BAD_PARAM': 'bad_val'}) retval = self._ia_client.get_resource(['POLLING_INTERVAL']) log.debug('Retrieved parameters from agent: {0}'.format(retval)) self.assertTrue(isinstance(retval, dict)) self.assertEqual(retval['POLLING_INTERVAL'], 20) cmd = AgentCommand(command=ResourceAgentEvent.RESET) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.UNINITIALIZED) def test_initialize(self): # Test agent initialize command. This causes creation of driver process and transition to inactive. # We start in uninitialized state. # In this state there is no driver process. state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.UNINITIALIZED) # Initialize the agent. # The agent is spawned with a driver config, but you can pass one in # optinally with the initialize command. This validates the driver # config, launches a driver process and connects to it via messaging. # If successful, we switch to the inactive state. cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.INACTIVE) # Reset the agent. This causes the driver messaging to be stopped, # the driver process to end and switches us back to uninitialized. cmd = AgentCommand(command=ResourceAgentEvent.RESET) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.UNINITIALIZED) def test_states(self): # Test agent state transitions. state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.UNINITIALIZED) cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.INACTIVE) cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.IDLE) cmd = AgentCommand(command=ResourceAgentEvent.RUN) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.COMMAND) cmd = AgentCommand(command=ResourceAgentEvent.PAUSE) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.STOPPED) cmd = AgentCommand(command=ResourceAgentEvent.RESUME) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.COMMAND) cmd = AgentCommand(command=ResourceAgentEvent.CLEAR) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.IDLE) cmd = AgentCommand(command=ResourceAgentEvent.RUN) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.COMMAND) cmd = AgentCommand(command=ResourceAgentEvent.PAUSE) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.STOPPED) cmd = AgentCommand(command=ResourceAgentEvent.CLEAR) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.IDLE) cmd = AgentCommand(command=ResourceAgentEvent.RUN) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.COMMAND) cmd = AgentCommand(command=DriverEvent.START_AUTOSAMPLE) self._ia_client.execute_resource(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.STREAMING) cmd = AgentCommand(command=DriverEvent.STOP_AUTOSAMPLE) self._ia_client.execute_resource(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.COMMAND) cmd = AgentCommand(command=ResourceAgentEvent.RESET) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.UNINITIALIZED) def test_capabilities(self): """ Test the ability to retrieve agent and resource parameter and command capabilities in various system states. """ # Test the ability to retrieve agent and resource parameter and command capabilities. acmds = self._ia_client.get_capabilities(['AGT_CMD']) log.debug('Agent Commands: {0}'.format(acmds)) # acmds = [item[1] for item in acmds] self.assertListsEqual(acmds, AGT_CMDS.keys()) apars = self._ia_client.get_capabilities(['AGT_PAR']) log.debug('Agent Parameters: {0}'.format(apars)) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.UNINITIALIZED) cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.INACTIVE) rcmds = self._ia_client.get_capabilities(['RES_CMD']) log.debug('Resource Commands: {0}'.format(rcmds)) # rcmds = [item[1] for item in rcmds] self.assertListsEqual(rcmds, CMDS.keys()) rpars = self._ia_client.get_capabilities(['RES_PAR']) log.debug('Resource Parameters: {0}'.format(rpars)) # rpars = [item[1] for item in rpars] self.assertListsEqual(rpars, PARAMS.keys()) cmd = AgentCommand(command=ResourceAgentEvent.RESET) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.UNINITIALIZED) def test_errors(self): # Test illegal behavior and replies. state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.UNINITIALIZED) # Can't go active in unitialized state. # Status 660 is state error. cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE) with self.assertRaises(Conflict): self._ia_client.execute_agent(cmd) # Can't command driver in this state. cmd = AgentCommand(command=DriverEvent.ACQUIRE_SAMPLE) with self.assertRaises(Conflict): self._ia_client.execute_resource(cmd) #self.assertEqual(reply.status, 660) cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.INACTIVE) cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.IDLE) cmd = AgentCommand(command=ResourceAgentEvent.RUN) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.COMMAND) # 404 unknown agent command. cmd = AgentCommand(command='kiss_edward') with self.assertRaises(BadRequest): self._ia_client.execute_agent(cmd) # 670 unknown driver command. cmd = AgentCommand(command='acquire_sample_please') with self.assertRaises(ServerError): self._ia_client.execute_resource(cmd) # 630 Parameter error. #self.assertRaises(InstParameterError, self._ia_client.get_param, 'bogus bogus') cmd = AgentCommand(command=ResourceAgentEvent.RESET) self._ia_client.execute_agent(cmd) state = self._ia_client.get_agent_state() self.assertEqual(state, ResourceAgentState.UNINITIALIZED)
class TestGranulePublish(IonIntegrationTestCase): def setUp(self): # Start container self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.pubsubclient = PubsubManagementServiceClient(node=self.container.node) self.dpclient = DataProductManagementServiceClient(node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node) self.dataproductclient = DataProductManagementServiceClient(node=self.container.node) self.dataset_management = DatasetManagementServiceClient() def create_logger(self, name, stream_id=''): # logger process producer_definition = ProcessDefinition(name=name+'_logger') producer_definition.executable = { 'module':'ion.processes.data.stream_granule_logger', 'class':'StreamGranuleLogger' } logger_procdef_id = self.processdispatchclient.create_process_definition(process_definition=producer_definition) configuration = { 'process':{ 'stream_id':stream_id, } } pid = self.processdispatchclient.schedule_process(process_definition_id=logger_procdef_id, configuration=configuration) return pid #overriding trigger function here to use new granule def test_granule_publish(self): log.debug("test_granule_publish ") self.loggerpids = [] #retrieve the param dict from the repository pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) stream_definition_id = self.pubsubclient.create_stream_definition('parsed stream', parameter_dictionary_id=pdict_id) tdom, sdom = time_series_domain() dp_obj = IonObject(RT.DataProduct, name='the parsed data', description='ctd stream test', temporal_domain = tdom.dump(), spatial_domain = sdom.dump()) data_product_id1 = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=stream_definition_id) # Retrieve the id of the output stream of the out Data Product stream_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasStream, None, True) log.debug( 'test_granule_publish: Data product streams1 = %s', stream_ids) pid = self.create_logger('ctd_parsed', stream_ids[0] ) self.loggerpids.append(pid) rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) #create the publisher from the stream route stream_route = self.pubsubclient.read_stream_route(stream_ids[0]) publisher = StandaloneStreamPublisher(stream_ids[0], stream_route) # this is one sample from the ctd driver tomato = {"driver_timestamp": 3555971105.1268806, "instrument_id": "ABC-123", "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed", "values": [{"value": 22.9304, "value_id": "temp"}, {"value": 51.57381, "value_id": "conductivity"}, {"value": 915.551, "value_id": "pressure"}]} for value in tomato['values']: log.debug("test_granule_publish: Looping tomato values key: %s val: %s ", str(value['value']), str(value['value_id'])) if value['value_id'] in rdt: rdt[value['value_id']] = numpy.array( [ value['value'] ] ) log.debug("test_granule_publish: Added data item %s val: %s ", str(value['value']), str(value['value_id']) ) g = rdt.to_granule() publisher.publish(g) time.sleep(3) for pid in self.loggerpids: self.processdispatchclient.cancel_process(pid)
class TestOmsLaunch(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.omsclient = ObservatoryManagementServiceClient( node=self.container.node) self.imsclient = InstrumentManagementServiceClient( node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient( node=self.container.node) self.dpclient = DataProductManagementServiceClient( node=self.container.node) self.pubsubcli = PubsubManagementServiceClient( node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient( node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient( node=self.container.node) self.dataset_management = DatasetManagementServiceClient() # Use the network definition provided by RSN OMS directly. rsn_oms = CIOMSClientFactory.create_instance(DVR_CONFIG['oms_uri']) self._network_definition = RsnOmsUtil.build_network_definition(rsn_oms) # get serialized version for the configuration: self._network_definition_ser = NetworkUtil.serialize_network_definition( self._network_definition) if log.isEnabledFor(logging.DEBUG): log.debug("NetworkDefinition serialization:\n%s", self._network_definition_ser) self.platformModel_id = None self.all_platforms = {} self.agent_streamconfig_map = {} self._async_data_result = AsyncResult() self._data_subscribers = [] self._samples_received = [] self.addCleanup(self._stop_data_subscribers) self._async_event_result = AsyncResult() self._event_subscribers = [] self._events_received = [] self.addCleanup(self._stop_event_subscribers) self._start_event_subscriber() self._set_up_DataProduct_obj() self._set_up_PlatformModel_obj() def _set_up_DataProduct_obj(self): # Create data product object to be used for each of the platform log streams tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() self.pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'platform_eng_parsed', id_only=True) self.platform_eng_stream_def_id = self.pubsubcli.create_stream_definition( name='platform_eng', parameter_dictionary_id=self.pdict_id) self.dp_obj = IonObject(RT.DataProduct, name='platform_eng data', description='platform_eng test', temporal_domain=tdom, spatial_domain=sdom) def _set_up_PlatformModel_obj(self): # Create PlatformModel platformModel_obj = IonObject(RT.PlatformModel, name='RSNPlatformModel', description="RSNPlatformModel") try: self.platformModel_id = self.imsclient.create_platform_model( platformModel_obj) except BadRequest as ex: self.fail("failed to create new PLatformModel: %s" % ex) log.debug('new PlatformModel id = %s', self.platformModel_id) def _traverse(self, pnode, platform_id, parent_platform_objs=None): """ Recursive routine that repeatedly calls _prepare_platform to build the object dictionary for each platform. @param pnode PlatformNode @param platform_id ID of the platform to be visited @param parent_platform_objs dict of objects associated to parent platform, if any. @retval the dict returned by _prepare_platform at this level. """ log.info("Starting _traverse for %r", platform_id) plat_objs = self._prepare_platform(pnode, platform_id, parent_platform_objs) self.all_platforms[platform_id] = plat_objs # now, traverse the children: for sub_pnode in pnode.subplatforms.itervalues(): subplatform_id = sub_pnode.platform_id self._traverse(sub_pnode, subplatform_id, plat_objs) return plat_objs def _prepare_platform(self, pnode, platform_id, parent_platform_objs): """ This routine generalizes the manual construction originally done in test_oms_launch.py. It is called by the recursive _traverse method so all platforms starting from a given base platform are prepared. Note: For simplicity in this test, sites are organized in the same hierarchical way as the platforms themselves. @param pnode PlatformNode @param platform_id ID of the platform to be visited @param parent_platform_objs dict of objects associated to parent platform, if any. @retval a dict of associated objects similar to those in test_oms_launch """ site__obj = IonObject(RT.PlatformSite, name='%s_PlatformSite' % platform_id, description='%s_PlatformSite platform site' % platform_id) site_id = self.omsclient.create_platform_site(site__obj) if parent_platform_objs: # establish hasSite association with the parent self.rrclient.create_association( subject=parent_platform_objs['site_id'], predicate=PRED.hasSite, object=site_id) # prepare platform attributes and ports: monitor_attribute_objs, monitor_attribute_dicts = self._prepare_platform_attributes( pnode, platform_id) port_objs, port_dicts = self._prepare_platform_ports( pnode, platform_id) device__obj = IonObject( RT.PlatformDevice, name='%s_PlatformDevice' % platform_id, description='%s_PlatformDevice platform device' % platform_id, # ports=port_objs, # platform_monitor_attributes = monitor_attribute_objs ) device__dict = dict( ports=port_dicts, platform_monitor_attributes=monitor_attribute_dicts) self.device_id = self.imsclient.create_platform_device(device__obj) self.imsclient.assign_platform_model_to_platform_device( self.platformModel_id, self.device_id) self.rrclient.create_association(subject=site_id, predicate=PRED.hasDevice, object=self.device_id) self.damsclient.register_instrument(instrument_id=self.device_id) if parent_platform_objs: # establish hasDevice association with the parent self.rrclient.create_association( subject=parent_platform_objs['device_id'], predicate=PRED.hasDevice, object=self.device_id) agent__obj = IonObject(RT.PlatformAgent, name='%s_PlatformAgent' % platform_id, description='%s_PlatformAgent platform agent' % platform_id) agent_id = self.imsclient.create_platform_agent(agent__obj) if parent_platform_objs: # add this platform_id to parent's children: parent_platform_objs['children'].append(platform_id) self.imsclient.assign_platform_model_to_platform_agent( self.platformModel_id, agent_id) # agent_instance_obj = IonObject(RT.PlatformAgentInstance, # name='%s_PlatformAgentInstance' % platform_id, # description="%s_PlatformAgentInstance" % platform_id) # # agent_instance_id = self.imsclient.create_platform_agent_instance( # agent_instance_obj, agent_id, device_id) plat_objs = { 'platform_id': platform_id, 'site__obj': site__obj, 'site_id': site_id, 'device__obj': device__obj, 'device_id': self.device_id, 'agent__obj': agent__obj, 'agent_id': agent_id, # 'agent_instance_obj': agent_instance_obj, # 'agent_instance_id': agent_instance_id, 'children': [] } log.info("plat_objs for platform_id %r = %s", platform_id, str(plat_objs)) stream_config = self._create_stream_config(plat_objs) self.agent_streamconfig_map[platform_id] = stream_config # self.agent_streamconfig_map[platform_id] = None # self._start_data_subscriber(agent_instance_id, stream_config) return plat_objs def _prepare_platform_attributes(self, pnode, platform_id): """ Returns the list of PlatformMonitorAttributes objects corresponding to the attributes associated to the given platform. """ # TODO complete the clean-up of this method ret_infos = dict((n, a.defn) for (n, a) in pnode.attrs.iteritems()) monitor_attribute_objs = [] monitor_attribute_dicts = [] for attrName, attrDfn in ret_infos.iteritems(): log.debug("platform_id=%r: preparing attribute=%r", platform_id, attrName) monitor_rate = attrDfn['monitorCycleSeconds'] units = attrDfn['units'] plat_attr_obj = IonObject(OT.PlatformMonitorAttributes, id=attrName, monitor_rate=monitor_rate, units=units) plat_attr_dict = dict(id=attrName, monitor_rate=monitor_rate, units=units) monitor_attribute_objs.append(plat_attr_obj) monitor_attribute_dicts.append(plat_attr_dict) return monitor_attribute_objs, monitor_attribute_dicts def _prepare_platform_ports(self, pnode, platform_id): """ Returns the list of PlatformPort objects corresponding to the ports associated to the given platform. """ # TODO complete the clean-up of this method port_objs = [] port_dicts = [] for port_id, network in pnode.ports.iteritems(): log.debug("platform_id=%r: preparing port=%r network=%s", platform_id, port_id, network) # # Note: the name "IP" address has been changed to "network" address # in the CI-OMS interface spec. # plat_port_obj = IonObject(OT.PlatformPort, port_id=port_id, ip_address=network) plat_port_dict = dict(port_id=port_id, network=network) port_objs.append(plat_port_obj) port_dicts.append(plat_port_dict) return port_objs, port_dicts def _create_stream_config(self, plat_objs): platform_id = plat_objs['platform_id'] device_id = plat_objs['device_id'] #create the log data product self.dp_obj.name = '%s platform_eng data' % platform_id self.data_product_id = self.dpclient.create_data_product( data_product=self.dp_obj, stream_definition_id=self.platform_eng_stream_def_id) self.damsclient.assign_data_product( input_resource_id=self.device_id, data_product_id=self.data_product_id) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(self.data_product_id, PRED.hasStream, None, True) stream_config = self._build_stream_config(stream_ids[0]) return stream_config def _build_stream_config(self, stream_id=''): platform_eng_dictionary = DatasetManagementService.get_parameter_dictionary_by_name( 'platform_eng_parsed') #get the streamroute object from pubsub by passing the stream_id stream_def_ids, _ = self.rrclient.find_objects( stream_id, PRED.hasStreamDefinition, RT.StreamDefinition, True) stream_route = self.pubsubcli.read_stream_route(stream_id=stream_id) stream_config = { 'routing_key': stream_route.routing_key, 'stream_id': stream_id, 'stream_definition_ref': stream_def_ids[0], 'exchange_point': stream_route.exchange_point, 'parameter_dictionary': platform_eng_dictionary.dump() } return stream_config def _set_platform_agent_instances(self): """ Once most of the objs/defs associated with all platforms are in place, this method creates and associates the PlatformAgentInstance elements. """ self.platform_configs = {} for platform_id, plat_objs in self.all_platforms.iteritems(): PLATFORM_CONFIG = { 'platform_id': platform_id, 'agent_streamconfig_map': None, #self.agent_streamconfig_map, 'driver_config': DVR_CONFIG, 'network_definition': self._network_definition_ser } self.platform_configs[platform_id] = { 'platform_id': platform_id, 'agent_streamconfig_map': self.agent_streamconfig_map, 'driver_config': DVR_CONFIG, 'network_definition': self._network_definition_ser } agent_config = { 'platform_config': PLATFORM_CONFIG, } self.stream_id = self.agent_streamconfig_map[platform_id][ 'stream_id'] # import pprint # print '============== platform id within unit test: %s ===========' % platform_id # pprint.pprint(agent_config) #agent_config['platform_config']['agent_streamconfig_map'] = None agent_instance_obj = IonObject( RT.PlatformAgentInstance, name='%s_PlatformAgentInstance' % platform_id, description="%s_PlatformAgentInstance" % platform_id, agent_config=agent_config) agent_id = plat_objs['agent_id'] device_id = plat_objs['device_id'] agent_instance_id = self.imsclient.create_platform_agent_instance( agent_instance_obj, agent_id, self.device_id) plat_objs['agent_instance_obj'] = agent_instance_obj plat_objs['agent_instance_id'] = agent_instance_id stream_config = self.agent_streamconfig_map[platform_id] self._start_data_subscriber(agent_instance_id, stream_config) def _start_data_subscriber(self, stream_name, stream_config): """ Starts data subscriber for the given stream_name and stream_config """ def consume_data(message, stream_route, stream_id): # A callback for processing subscribed-to data. log.info('Subscriber received data message: %s.', str(message)) self._samples_received.append(message) self._async_data_result.set() log.info('_start_data_subscriber stream_name=%r', stream_name) stream_id = self.stream_id #stream_config['stream_id'] # Create subscription for the stream exchange_name = '%s_queue' % stream_name self.container.ex_manager.create_xn_queue(exchange_name).purge() sub = StandaloneStreamSubscriber(exchange_name, consume_data) sub.start() self._data_subscribers.append(sub) sub_id = self.pubsubcli.create_subscription(name=exchange_name, stream_ids=[stream_id]) self.pubsubcli.activate_subscription(sub_id) sub.subscription_id = sub_id def _stop_data_subscribers(self): """ Stop the data subscribers on cleanup. """ try: for sub in self._data_subscribers: if hasattr(sub, 'subscription_id'): try: self.pubsubcli.deactivate_subscription( sub.subscription_id) except: pass self.pubsubcli.delete_subscription(sub.subscription_id) sub.stop() finally: self._data_subscribers = [] def _start_event_subscriber(self, event_type="DeviceEvent", sub_type="platform_event"): """ Starts event subscriber for events of given event_type ("DeviceEvent" by default) and given sub_type ("platform_event" by default). """ def consume_event(evt, *args, **kwargs): # A callback for consuming events. log.info('Event subscriber received evt: %s.', str(evt)) self._events_received.append(evt) self._async_event_result.set(evt) sub = EventSubscriber(event_type=event_type, sub_type=sub_type, callback=consume_event) sub.start() log.info("registered event subscriber for event_type=%r, sub_type=%r", event_type, sub_type) self._event_subscribers.append(sub) sub._ready_event.wait(timeout=EVENT_TIMEOUT) def _stop_event_subscribers(self): """ Stops the event subscribers on cleanup. """ try: for sub in self._event_subscribers: if hasattr(sub, 'subscription_id'): try: self.pubsubcli.deactivate_subscription( sub.subscription_id) except: pass self.pubsubcli.delete_subscription(sub.subscription_id) sub.stop() finally: self._event_subscribers = [] @skip("IMS does't net implement topology") def test_hierarchy(self): self._create_launch_verify(BASE_PLATFORM_ID) @skip("Needs alignment with recent IMS changes") def test_single_platform(self): self._create_launch_verify('LJ01D') def _create_launch_verify(self, base_platform_id): # and trigger the traversal of the branch rooted at that base platform # to create corresponding ION objects and configuration dictionaries: pnode = self._network_definition.pnodes[base_platform_id] base_platform_objs = self._traverse(pnode, base_platform_id) # now that most of the topology information is there, add the # PlatformAgentInstance elements self._set_platform_agent_instances() base_platform_config = self.platform_configs[base_platform_id] log.info("base_platform_id = %r", base_platform_id) #------------------------------------------------------------------------------------- # Create Data Process Definition and Data Process for the eng stream monitor process #------------------------------------------------------------------------------------- dpd_obj = IonObject( RT.DataProcessDefinition, name='DemoStreamAlertTransform', description='For testing EventTriggeredTransform_B', module='ion.processes.data.transforms.event_alert_transform', class_name='DemoStreamAlertTransform') self.platform_dprocdef_id = self.dataprocessclient.create_data_process_definition( dpd_obj) #THERE SHOULD BE NO STREAMDEF REQUIRED HERE. platform_streamdef_id = self.pubsubcli.create_stream_definition( name='platform_eng_parsed', parameter_dictionary_id=self.pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition( platform_streamdef_id, self.platform_dprocdef_id, binding='output') config = { 'process': { 'timer_interval': 5, 'queue_name': 'a_queue', 'variable_name': 'input_voltage', 'time_field_name': 'preferred_timestamp', 'valid_values': [-100, 100], 'timer_origin': 'Interval Timer' } } platform_data_process_id = self.dataprocessclient.create_data_process( self.platform_dprocdef_id, [self.data_product_id], {}, config) self.dataprocessclient.activate_data_process(platform_data_process_id) self.addCleanup(self.dataprocessclient.delete_data_process, platform_data_process_id) #------------------------------- # Launch Base Platform AgentInstance, connect to the resource agent client #------------------------------- agent_instance_id = base_platform_objs['agent_instance_id'] log.debug( "about to call imsclient.start_platform_agent_instance with id=%s", agent_instance_id) pid = self.imsclient.start_platform_agent_instance( platform_agent_instance_id=agent_instance_id) log.debug("start_platform_agent_instance returned pid=%s", pid) #wait for start instance_obj = self.imsclient.read_platform_agent_instance( agent_instance_id) gate = ProcessStateGate(self.processdispatchclient.read_process, instance_obj.agent_process_id, ProcessStateEnum.RUNNING) self.assertTrue( gate. await (90), "The platform agent instance did not spawn in 90 seconds") agent_instance_obj = self.imsclient.read_instrument_agent_instance( agent_instance_id) log.debug( 'test_oms_create_and_launch: Platform agent instance obj: %s', str(agent_instance_obj)) # Start a resource agent client to talk with the instrument agent. self._pa_client = ResourceAgentClient( 'paclient', name=agent_instance_obj.agent_process_id, process=FakeProcess()) log.debug(" test_oms_create_and_launch:: got pa client %s", str(self._pa_client)) log.debug("base_platform_config =\n%s", base_platform_config) # ping_agent can be issued before INITIALIZE retval = self._pa_client.ping_agent(timeout=TIMEOUT) log.debug('Base Platform ping_agent = %s', str(retval)) # issue INITIALIZE command to the base platform, which will launch the # creation of the whole platform hierarchy rooted at base_platform_config['platform_id'] # cmd = AgentCommand(command=PlatformAgentEvent.INITIALIZE, kwargs=dict(plat_config=base_platform_config)) cmd = AgentCommand(command=PlatformAgentEvent.INITIALIZE) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug('Base Platform INITIALIZE = %s', str(retval)) # GO_ACTIVE cmd = AgentCommand(command=PlatformAgentEvent.GO_ACTIVE) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug('Base Platform GO_ACTIVE = %s', str(retval)) # RUN: cmd = AgentCommand(command=PlatformAgentEvent.RUN) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug('Base Platform RUN = %s', str(retval)) # START_MONITORING: cmd = AgentCommand(command=PlatformAgentEvent.START_MONITORING) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug('Base Platform START_MONITORING = %s', str(retval)) # wait for data sample # just wait for at least one -- see consume_data above log.info("waiting for reception of a data sample...") self._async_data_result.get(timeout=DATA_TIMEOUT) self.assertTrue(len(self._samples_received) >= 1) log.info("waiting a bit more for reception of more data samples...") sleep(15) log.info("Got data samples: %d", len(self._samples_received)) # wait for event # just wait for at least one event -- see consume_event above log.info("waiting for reception of an event...") self._async_event_result.get(timeout=EVENT_TIMEOUT) log.info("Received events: %s", len(self._events_received)) #get the extended platfrom which wil include platform aggreate status fields extended_platform = self.imsclient.get_platform_device_extension( self.device_id) # log.debug( 'test_single_platform extended_platform: %s', str(extended_platform) ) # log.debug( 'test_single_platform power_status_roll_up: %s', str(extended_platform.computed.power_status_roll_up.value) ) # log.debug( 'test_single_platform comms_status_roll_up: %s', str(extended_platform.computed.communications_status_roll_up.value) ) # STOP_MONITORING: cmd = AgentCommand(command=PlatformAgentEvent.STOP_MONITORING) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug('Base Platform STOP_MONITORING = %s', str(retval)) # GO_INACTIVE cmd = AgentCommand(command=PlatformAgentEvent.GO_INACTIVE) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug('Base Platform GO_INACTIVE = %s', str(retval)) # RESET: Resets the base platform agent, which includes termination of # its sub-platforms processes: cmd = AgentCommand(command=PlatformAgentEvent.RESET) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug('Base Platform RESET = %s', str(retval)) #------------------------------- # Stop Base Platform AgentInstance #------------------------------- self.imsclient.stop_platform_agent_instance( platform_agent_instance_id=agent_instance_id)
class TestDMEnd2End(IonIntegrationTestCase): def setUp(self): # Love the non pep-8 convention self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.process_dispatcher = ProcessDispatcherServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() self.ingestion_management = IngestionManagementServiceClient() self.data_retriever = DataRetrieverServiceClient() self.event = Event() self.exchange_space_name = 'test_granules' self.exchange_point_name = 'science_data' self.i = 0 self.cci = 0 #-------------------------------------------------------------------------------- # Helper/Utility methods #-------------------------------------------------------------------------------- def create_dataset(self, parameter_dict_id=''): ''' Creates a time-series dataset ''' if not parameter_dict_id: parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) dataset = Dataset('test_dataset_%i'%self.i) dataset_id = self.dataset_management.create_dataset(dataset, parameter_dictionary_id=parameter_dict_id) self.addCleanup(self.dataset_management.delete_dataset, dataset_id) return dataset_id def get_datastore(self, dataset_id): ''' Gets an instance of the datastore This method is primarily used to defeat a bug where integration tests in multiple containers may sometimes delete a CouchDB datastore and the other containers are unaware of the new state of the datastore. ''' dataset = self.dataset_management.read_dataset(dataset_id) datastore_name = dataset.datastore_name datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA) return datastore def get_ingestion_config(self): ''' Grab the ingestion configuration from the resource registry ''' # The ingestion configuration should have been created by the bootstrap service # which is configured through r2deploy.yml ingest_configs, _ = self.resource_registry.find_resources(restype=RT.IngestionConfiguration,id_only=True) return ingest_configs[0] def launch_producer(self, stream_id=''): ''' Launch the producer ''' pid = self.container.spawn_process('better_data_producer', 'ion.processes.data.example_data_producer', 'BetterDataProducer', {'process':{'stream_id':stream_id}}) self.addCleanup(self.container.terminate_process, pid) def make_simple_dataset(self): ''' Makes a stream, a stream definition and a dataset, the essentials for most of these tests ''' pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd data %i' % self.i, parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('ctd stream %i' % self.i, 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) dataset_id = self.create_dataset(pdict_id) # self.get_datastore(dataset_id) self.i += 1 return stream_id, route, stream_def_id, dataset_id def publish_hifi(self,stream_id,stream_route,offset=0): ''' Publish deterministic data ''' pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) + (offset * 10) rdt['temp'] = np.arange(10) + (offset * 10) pub.publish(rdt.to_granule()) def publish_fake_data(self,stream_id, route): ''' Make four granules ''' for i in xrange(4): self.publish_hifi(stream_id,route,i) def start_ingestion(self, stream_id, dataset_id): ''' Starts ingestion/persistence for a given dataset ''' ingest_config_id = self.get_ingestion_config() self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) def stop_ingestion(self, stream_id): ingest_config_id = self.get_ingestion_config() self.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id) def validate_granule_subscription(self, msg, route, stream_id): ''' Validation for granule format ''' if msg == {}: return rdt = RecordDictionaryTool.load_from_granule(msg) log.info('%s', rdt.pretty_print()) self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg)) self.event.set() def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40): ''' Loops until there is a sufficient amount of data in the dataset ''' done = False with gevent.Timeout(40): while not done: extents = self.dataset_management.dataset_extents(dataset_id, 'time') granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1) rdt = RecordDictionaryTool.load_from_granule(granule) if rdt['time'] and rdt['time'][0] != rdt._pdict.get_context('time').fill_value and extents >= data_size: done = True else: gevent.sleep(0.2) #-------------------------------------------------------------------------------- # Test Methods #-------------------------------------------------------------------------------- def test_dm_end_2_end(self): #-------------------------------------------------------------------------------- # Set up a stream and have a mock instrument (producer) send data #-------------------------------------------------------------------------------- self.event.clear() # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) #-------------------------------------------------------------------------------- # Start persisting the data on the stream # - Get the ingestion configuration from the resource registry # - Create the dataset # - call persist_data_stream to setup the subscription for the ingestion workers # on the stream that you specify which causes the data to be persisted #-------------------------------------------------------------------------------- ingest_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) self.addCleanup(self.stop_ingestion, stream_id) #-------------------------------------------------------------------------------- # Now the granules are ingesting and persisted #-------------------------------------------------------------------------------- self.launch_producer(stream_id) self.wait_until_we_have_enough_granules(dataset_id,40) #-------------------------------------------------------------------------------- # Now get the data in one chunk using an RPC Call to start_retreive #-------------------------------------------------------------------------------- replay_data = self.data_retriever.retrieve(dataset_id) self.assertIsInstance(replay_data, Granule) rdt = RecordDictionaryTool.load_from_granule(replay_data) self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:]) self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all()) #-------------------------------------------------------------------------------- # Now to try the streamed approach #-------------------------------------------------------------------------------- replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id) log.info('Process ID: %s', process_id) replay_client = ReplayClient(process_id) #-------------------------------------------------------------------------------- # Create the listening endpoint for the the retriever to talk to #-------------------------------------------------------------------------------- sub_id = self.pubsub_management.create_subscription(self.exchange_space_name,stream_ids=[replay_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription) subscriber.start() self.addCleanup(subscriber.stop) self.data_retriever.start_replay_agent(self.replay_id) self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched') replay_client.start_replay() self.assertTrue(self.event.wait(10)) self.data_retriever.cancel_replay_agent(self.replay_id) #-------------------------------------------------------------------------------- # Test the slicing capabilities #-------------------------------------------------------------------------------- granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)}) rdt = RecordDictionaryTool.load_from_granule(granule) b = rdt['time'] == np.arange(5) self.assertTrue(b.all() if not isinstance(b,bool) else b) def test_coverage_transform(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_parsed() stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) ingestion_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id) self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id) publisher = StandaloneStreamPublisher(stream_id, route) rdt = ph.get_rdt(stream_def_id) ph.fill_parsed_rdt(rdt) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time']) np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp']) np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914])) np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.])) np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068])) np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32')) np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32')) def test_ingestion_pause(self): ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() ingestion_config_id = self.get_ingestion_config() self.start_ingestion(ctd_stream_id, dataset_id) self.addCleanup(self.stop_ingestion, ctd_stream_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) publisher = StandaloneStreamPublisher(ctd_stream_id, route) monitor = DatasetMonitor(dataset_id) self.addCleanup(monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(monitor.wait()) granule = self.data_retriever.retrieve(dataset_id) self.ingestion_management.pause_data_stream(ctd_stream_id, ingestion_config_id) monitor.event.clear() rdt['time'] = np.arange(10,20) publisher.publish(rdt.to_granule()) self.assertFalse(monitor.event.wait(1)) self.ingestion_management.resume_data_stream(ctd_stream_id, ingestion_config_id) self.assertTrue(monitor.wait()) granule = self.data_retriever.retrieve(dataset_id) rdt2 = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_almost_equal(rdt2['time'], np.arange(20)) def test_last_granule(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) self.addCleanup(self.stop_ingestion, stream_id) self.publish_hifi(stream_id,route, 0) self.publish_hifi(stream_id,route, 1) self.wait_until_we_have_enough_granules(dataset_id,20) # I just need two success = False def verifier(): replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id, 10) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(10) + 10 if not isinstance(comp,bool): return comp.all() return False success = poll(verifier) self.assertTrue(success) success = False def verify_points(): replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id,5) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(15,20) if not isinstance(comp,bool): return comp.all() return False success = poll(verify_points) self.assertTrue(success) def test_replay_with_parameters(self): #-------------------------------------------------------------------------------- # Create the configurations and the dataset #-------------------------------------------------------------------------------- # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id) self.addCleanup(self.stop_ingestion, stream_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) self.publish_fake_data(stream_id, route) self.assertTrue(dataset_monitor.wait()) query = { 'start_time': 0 - 2208988800, 'end_time': 19 - 2208988800, 'stride_time' : 2, 'parameters': ['time','temp'] } retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query) rdt = RecordDictionaryTool.load_from_granule(retrieved_data) np.testing.assert_array_equal(rdt['time'], np.arange(0,20,2)) self.assertEquals(set(rdt.iterkeys()), set(['time','temp'])) extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp']) self.assertTrue(extents['time']>=20) self.assertTrue(extents['temp']>=20) def test_repersist_data(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) self.publish_hifi(stream_id,route,0) self.publish_hifi(stream_id,route,1) self.wait_until_we_have_enough_granules(dataset_id,20) config_id = self.get_ingestion_config() self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id) self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id) self.addCleanup(self.stop_ingestion, stream_id) self.publish_hifi(stream_id,route,2) self.publish_hifi(stream_id,route,3) self.wait_until_we_have_enough_granules(dataset_id,40) success = False with gevent.timeout.Timeout(5): while not success: replay_granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(0,40) if not isinstance(comp,bool): success = comp.all() gevent.sleep(1) self.assertTrue(success) @unittest.skip('deprecated') def test_correct_time(self): # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e. # the conversion factor between unix and NTP time unix_now = np.floor(time.time()) ntp_now = unix_now + 2208988800 unix_ago = unix_now - 20 ntp_ago = unix_ago + 2208988800 stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() coverage = DatasetManagementService._get_simplex_coverage(dataset_id, mode='a') coverage.insert_timesteps(20) coverage.set_parameter_values('time', np.arange(ntp_ago,ntp_now)) temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id) self.assertTrue( np.abs(temporal_bounds[0] - unix_ago) < 2) self.assertTrue( np.abs(temporal_bounds[1] - unix_now) < 2) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.') def test_out_of_band_retrieve(self): # Setup the environemnt stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) # Fill the dataset self.publish_fake_data(stream_id, route) self.wait_until_we_have_enough_granules(dataset_id,40) # Retrieve the data granule = DataRetrieverService.retrieve_oob(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) self.assertTrue((rdt['time'] == np.arange(40)).all()) def publish_and_wait(self, dataset_id, granule): stream_ids, _ = self.resource_registry.find_objects(dataset_id, PRED.hasStream,id_only=True) stream_id=stream_ids[0] route = self.pubsub_management.read_stream_route(stream_id) publisher = StandaloneStreamPublisher(stream_id,route) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(granule) self.assertTrue(dataset_monitor.wait()) def test_sparse_values(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_sparse() stream_def_id = self.pubsub_management.create_stream_definition('sparse', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) dataset_id = self.create_dataset(pdict_id) self.start_ingestion(stream_id,dataset_id) self.addCleanup(self.stop_ingestion, stream_id) # Publish initial granule # the first one has the sparse value set inside it, sets lat to 45 and lon to -71 ntp_now = time.time() + 2208988800 rdt = ph.get_rdt(stream_def_id) rdt['time'] = [ntp_now] rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [300000] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [''] rdt['lat'] = [45] rdt['conductivity'] = [4341400] rdt['driver_timestamp'] = [ntp_now] rdt['lon'] = [-71] rdt['pressure'] = [256.8] publisher = StandaloneStreamPublisher(stream_id, route) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) dataset_monitor.reset() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) # Check the values and make sure they're correct np.testing.assert_allclose(rdt_out['time'], rdt['time']) np.testing.assert_allclose(rdt_out['temp'], rdt['temp']) np.testing.assert_allclose(rdt_out['lat'], np.array([45])) np.testing.assert_allclose(rdt_out['lon'], np.array([-71])) np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914])) np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.])) np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068])) np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32')) np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32')) # We're going to change the lat/lon rdt = ph.get_rdt(stream_def_id) rdt['time'] = time.time() + 2208988800 rdt['lat'] = [46] rdt['lon'] = [-73] publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) dataset_monitor.reset() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_allclose(rdt_out['time'], rdt['time']) for i in xrange(9): ntp_now = time.time() + 2208988800 rdt['time'] = [ntp_now] rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [300000] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [None] rdt['conductivity'] = [4341400] rdt['driver_timestamp'] = [ntp_now] rdt['pressure'] = [256.8] publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) dataset_monitor.reset() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_allclose(rdt_out['pressure'], np.array([256.8] * 10)) np.testing.assert_allclose(rdt_out['lat'], np.array([45] + [46] * 9)) np.testing.assert_allclose(rdt_out['lon'], np.array([-71] + [-73] * 9))
class TestDataProductManagementServiceIntegration(IonIntegrationTestCase): def setUp(self): # Start container #print 'instantiating container' self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dpsc_cli = DataProductManagementServiceClient() self.rrclient = ResourceRegistryServiceClient() self.damsclient = DataAcquisitionManagementServiceClient() self.pubsubcli = PubsubManagementServiceClient() self.ingestclient = IngestionManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.unsc = UserNotificationServiceClient() self.data_retriever = DataRetrieverServiceClient() #------------------------------------------ # Create the environment #------------------------------------------ self.stream_def_id = self.pubsubcli.create_stream_definition( name='SBE37_CDM') self.process_definitions = {} ingestion_worker_definition = ProcessDefinition( name='ingestion worker') ingestion_worker_definition.executable = { 'module': 'ion.processes.data.ingestion.science_granule_ingestion_worker', 'class': 'ScienceGranuleIngestionWorker' } process_definition_id = self.process_dispatcher.create_process_definition( process_definition=ingestion_worker_definition) self.process_definitions['ingestion_worker'] = process_definition_id self.pids = [] self.exchange_points = [] self.exchange_names = [] #------------------------------------------------------------------------------------------------ # First launch the ingestors #------------------------------------------------------------------------------------------------ self.exchange_space = 'science_granule_ingestion' self.exchange_point = 'science_data' config = DotDict() config.process.datastore_name = 'datasets' config.process.queue_name = self.exchange_space self.exchange_names.append(self.exchange_space) self.exchange_points.append(self.exchange_point) pid = self.process_dispatcher.schedule_process( self.process_definitions['ingestion_worker'], configuration=config) log.debug("the ingestion worker process id: %s", pid) self.pids.append(pid) self.addCleanup(self.cleaning_up) def cleaning_up(self): for pid in self.pids: log.debug("number of pids to be terminated: %s", len(self.pids)) try: self.process_dispatcher.cancel_process(pid) log.debug("Terminated the process: %s", pid) except: log.debug("could not terminate the process id: %s" % pid) IngestionManagementIntTest.clean_subscriptions() for xn in self.exchange_names: xni = self.container.ex_manager.create_xn_queue(xn) xni.delete() for xp in self.exchange_points: xpi = self.container.ex_manager.create_xp(xp) xpi.delete() def get_datastore(self, dataset_id): dataset = self.dataset_management.read_dataset(dataset_id) datastore_name = dataset.datastore_name datastore = self.container.datastore_manager.get_datastore( datastore_name, DataStore.DS_PROFILE.SCIDATA) return datastore @attr('EXT') @attr('PREP') def test_create_data_product(self): #------------------------------------------------------------------------------------------------ # create a stream definition for the data from the ctd simulator #------------------------------------------------------------------------------------------------ parameter_dictionary = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict') ctd_stream_def_id = self.pubsubcli.create_stream_definition( name='Simulated CTD data', parameter_dictionary_id=parameter_dictionary._id) log.debug("Created stream def id %s" % ctd_stream_def_id) #------------------------------------------------------------------------------------------------ # test creating a new data product w/o a stream definition #------------------------------------------------------------------------------------------------ dp_obj = IonObject(RT.DataProduct, name='DP1', description='some new dp') dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 10.0 dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -10.0 dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 10.0 dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -10.0 dp_obj.ooi_product_name = "PRODNAME" #------------------------------------------------------------------------------------------------ # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary #------------------------------------------------------------------------------------------------ dp_id = self.dpsc_cli.create_data_product( data_product=dp_obj, stream_definition_id=ctd_stream_def_id) # Assert that the data product has an associated stream at this stage stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True) self.assertNotEquals(len(stream_ids), 0) # Assert that the data product has an associated stream def at this stage stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStreamDefinition, RT.StreamDefinition, True) self.assertNotEquals(len(stream_ids), 0) self.dpsc_cli.activate_data_product_persistence(dp_id) dp_obj = self.dpsc_cli.read_data_product(dp_id) self.assertIsNotNone(dp_obj) self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0) log.debug('Created data product %s', dp_obj) #------------------------------------------------------------------------------------------------ # test creating a new data product with a stream definition #------------------------------------------------------------------------------------------------ log.debug('Creating new data product with a stream definition') dp_obj = IonObject(RT.DataProduct, name='DP2', description='some new dp') dp_id2 = self.dpsc_cli.create_data_product(dp_obj, ctd_stream_def_id) self.dpsc_cli.activate_data_product_persistence(dp_id2) log.debug('new dp_id = %s' % dp_id2) #------------------------------------------------------------------------------------------------ #make sure data product is associated with stream def #------------------------------------------------------------------------------------------------ streamdefs = [] streams, _ = self.rrclient.find_objects(dp_id2, PRED.hasStream, RT.Stream, True) for s in streams: log.debug("Checking stream %s" % s) sdefs, _ = self.rrclient.find_objects(s, PRED.hasStreamDefinition, RT.StreamDefinition, True) for sd in sdefs: log.debug("Checking streamdef %s" % sd) streamdefs.append(sd) self.assertIn(ctd_stream_def_id, streamdefs) group_names = self.dpsc_cli.get_data_product_group_list() self.assertIn("PRODNAME", group_names) # test reading a non-existent data product log.debug('reading non-existent data product') with self.assertRaises(NotFound): dp_obj = self.dpsc_cli.read_data_product('some_fake_id') # update a data product (tests read also) log.debug('Updating data product') # first get the existing dp object dp_obj = self.dpsc_cli.read_data_product(dp_id) # now tweak the object dp_obj.description = 'the very first dp' dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 20.0 dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -20.0 dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 20.0 dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -20.0 # now write the dp back to the registry update_result = self.dpsc_cli.update_data_product(dp_obj) # now get the dp back to see if it was updated dp_obj = self.dpsc_cli.read_data_product(dp_id) self.assertEquals(dp_obj.description, 'the very first dp') self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0) log.debug('Updated data product %s', dp_obj) #test extension extended_product = self.dpsc_cli.get_data_product_extension(dp_id) self.assertEqual(dp_id, extended_product._id) self.assertEqual( ComputedValueAvailability.PROVIDED, extended_product.computed.product_download_size_estimated.status) self.assertEqual( 0, extended_product.computed.product_download_size_estimated.value) self.assertEqual(ComputedValueAvailability.PROVIDED, extended_product.computed.parameters.status) #log.debug("test_create_data_product: parameters %s" % extended_product.computed.parameters.value) def ion_object_encoder(obj): return obj.__dict__ #test prepare for create data_product_data = self.dpsc_cli.prepare_data_product_support() #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2) self.assertEqual(data_product_data._id, "") self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport) self.assertEqual( len(data_product_data.associations['StreamDefinition'].resources), 2) self.assertEqual( len(data_product_data.associations['Dataset'].resources), 0) self.assertEqual( len(data_product_data.associations['StreamDefinition']. associated_resources), 0) self.assertEqual( len(data_product_data.associations['Dataset'].associated_resources ), 0) #test prepare for update data_product_data = self.dpsc_cli.prepare_data_product_support(dp_id) #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2) self.assertEqual(data_product_data._id, dp_id) self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport) self.assertEqual( len(data_product_data.associations['StreamDefinition'].resources), 2) self.assertEqual( len(data_product_data.associations['Dataset'].resources), 1) self.assertEqual( len(data_product_data.associations['StreamDefinition']. associated_resources), 1) self.assertEqual( data_product_data.associations['StreamDefinition']. associated_resources[0].s, dp_id) self.assertEqual( len(data_product_data.associations['Dataset'].associated_resources ), 1) self.assertEqual( data_product_data.associations['Dataset'].associated_resources[0]. s, dp_id) # now 'delete' the data product log.debug("deleting data product: %s" % dp_id) self.dpsc_cli.delete_data_product(dp_id) # Assert that there are no associated streams leftover after deleting the data product stream_ids, assoc_ids = self.rrclient.find_objects( dp_id, PRED.hasStream, RT.Stream, True) self.assertEquals(len(stream_ids), 0) self.assertEquals(len(assoc_ids), 0) self.dpsc_cli.force_delete_data_product(dp_id) # now try to get the deleted dp object with self.assertRaises(NotFound): dp_obj = self.dpsc_cli.read_data_product(dp_id) # Get the events corresponding to the data product ret = self.unsc.get_recent_events(resource_id=dp_id) events = ret.value for event in events: log.debug("event time: %s" % event.ts_created) self.assertTrue(len(events) > 0) def test_data_product_stream_def(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubcli.create_stream_definition( name='Simulated CTD data', parameter_dictionary_id=pdict_id) dp_obj = IonObject(RT.DataProduct, name='DP1', description='some new dp') dp_id = self.dpsc_cli.create_data_product( data_product=dp_obj, stream_definition_id=ctd_stream_def_id) stream_def_id = self.dpsc_cli.get_data_product_stream_definition(dp_id) self.assertEquals(ctd_stream_def_id, stream_def_id) def test_derived_data_product(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubcli.create_stream_definition( name='ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsubcli.delete_stream_definition, ctd_stream_def_id) dp = DataProduct(name='Instrument DP') dp_id = self.dpsc_cli.create_data_product( dp, stream_definition_id=ctd_stream_def_id) self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id) self.dpsc_cli.activate_data_product_persistence(dp_id) self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id) dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True) if not dataset_ids: raise NotFound("Data Product %s dataset does not exist" % str(dp_id)) dataset_id = dataset_ids[0] # Make the derived data product simple_stream_def_id = self.pubsubcli.create_stream_definition( name='TEMPWAT stream def', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp']) tempwat_dp = DataProduct(name='TEMPWAT', category=DataProductTypeEnum.DERIVED) tempwat_dp_id = self.dpsc_cli.create_data_product( tempwat_dp, stream_definition_id=simple_stream_def_id, parent_data_product_id=dp_id) self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id) # Check that the streams associated with the data product are persisted with stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True) for stream_id in stream_ids: self.assertTrue(self.ingestclient.is_persisted(stream_id)) stream_id = stream_ids[0] route = self.pubsubcli.read_stream_route(stream_id=stream_id) rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) rdt['pressure'] = np.arange(20) publisher = StandaloneStreamPublisher(stream_id, route) dataset_modified = Event() def cb(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True) es.start() self.addCleanup(es.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_modified.wait(30)) tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id, PRED.hasDataset, id_only=True) tempwat_dataset_id = tempwat_dataset_ids[0] granule = self.data_retriever.retrieve( tempwat_dataset_id, delivery_format=simple_stream_def_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(20)) self.assertEquals(set(rdt.fields), set(['time', 'temp'])) def test_activate_suspend_data_product(self): #------------------------------------------------------------------------------------------------ # create a stream definition for the data from the ctd simulator #------------------------------------------------------------------------------------------------ pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubcli.create_stream_definition( name='Simulated CTD data', parameter_dictionary_id=pdict_id) log.debug("Created stream def id %s" % ctd_stream_def_id) #------------------------------------------------------------------------------------------------ # test creating a new data product w/o a stream definition #------------------------------------------------------------------------------------------------ # Construct temporal and spatial Coordinate Reference System objects dp_obj = IonObject(RT.DataProduct, name='DP1', description='some new dp') log.debug("Created an IonObject for a data product: %s" % dp_obj) #------------------------------------------------------------------------------------------------ # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary #------------------------------------------------------------------------------------------------ dp_id = self.dpsc_cli.create_data_product( data_product=dp_obj, stream_definition_id=ctd_stream_def_id) #------------------------------------------------------------------------------------------------ # Subscribe to persist events #------------------------------------------------------------------------------------------------ queue = gevent.queue.Queue() def info_event_received(message, headers): queue.put(message) es = EventSubscriber(event_type=OT.InformationContentStatusEvent, callback=info_event_received, origin=dp_id, auto_delete=True) es.start() self.addCleanup(es.stop) #------------------------------------------------------------------------------------------------ # test activate and suspend data product persistence #------------------------------------------------------------------------------------------------ self.dpsc_cli.activate_data_product_persistence(dp_id) dp_obj = self.dpsc_cli.read_data_product(dp_id) self.assertIsNotNone(dp_obj) dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True) if not dataset_ids: raise NotFound("Data Product %s dataset does not exist" % str(dp_id)) dataset_id = dataset_ids[0] # Check that the streams associated with the data product are persisted with stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True) for stream_id in stream_ids: self.assertTrue(self.ingestclient.is_persisted(stream_id)) stream_id = stream_ids[0] route = self.pubsubcli.read_stream_route(stream_id=stream_id) rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) publisher = StandaloneStreamPublisher(stream_id, route) dataset_modified = Event() def cb(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True) es.start() self.addCleanup(es.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_modified.wait(30)) #-------------------------------------------------------------------------------- # Now get the data in one chunk using an RPC Call to start_retreive #-------------------------------------------------------------------------------- replay_data = self.data_retriever.retrieve(dataset_ids[0]) self.assertIsInstance(replay_data, Granule) log.debug( "The data retriever was able to replay the dataset that was attached to the data product " "we wanted to be persisted. Therefore the data product was indeed persisted with " "otherwise we could not have retrieved its dataset using the data retriever. Therefore " "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'" ) data_product_object = self.rrclient.read(dp_id) self.assertEquals(data_product_object.name, 'DP1') self.assertEquals(data_product_object.description, 'some new dp') log.debug( "Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. " " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the " "resource registry, name='%s', desc='%s'" % (dp_obj.name, dp_obj.description, data_product_object.name, data_product_object.description)) #------------------------------------------------------------------------------------------------ # test suspend data product persistence #------------------------------------------------------------------------------------------------ self.dpsc_cli.suspend_data_product_persistence(dp_id) dataset_modified.clear() rdt['time'] = np.arange(20, 40) publisher.publish(rdt.to_granule()) self.assertFalse(dataset_modified.wait(2)) self.dpsc_cli.activate_data_product_persistence(dp_id) dataset_modified.clear() publisher.publish(rdt.to_granule()) self.assertTrue(dataset_modified.wait(30)) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_almost_equal(rdt['time'], np.arange(40)) dataset_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasDataset, id_only=True) self.assertEquals(len(dataset_ids), 1) self.dpsc_cli.suspend_data_product_persistence(dp_id) self.dpsc_cli.force_delete_data_product(dp_id) # now try to get the deleted dp object with self.assertRaises(NotFound): dp_obj = self.rrclient.read(dp_id) info_event_counter = 0 runtime = 0 starttime = time.time() caught_events = [] #check that the four InfoStatusEvents were received while info_event_counter < 4 and runtime < 60: a = queue.get(timeout=60) caught_events.append(a) info_event_counter += 1 runtime = time.time() - starttime self.assertEquals(info_event_counter, 4)
class TestPreloadThenLoadDataset(IonIntegrationTestCase): """ Uses the preload system to define the ExternalDataset and related resources, then invokes services to perform the load """ def setUp(self): # Start container self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') config = dict(op="load", scenario="NOSE", attachments="res/preload/r2_ioc/attachments") self.container.spawn_process("Loader", "ion.processes.bootstrap.ion_loader", "IONLoader", config=config) self.pubsub = PubsubManagementServiceClient() self.dams = DataAcquisitionManagementServiceClient() @unittest.skip("depricated test, now in mi repo") def test_use_case(self): # setUp() has already started the container and performed the preload # self.assert_dataset_loaded('Test External CTD Dataset') # make sure we have the ExternalDataset resources self.assert_dataset_loaded('Unit Test SMB37') # association changed -- now use device name self.do_listen_for_incoming() # listen for any data being received from the dataset self.do_read_dataset() # call services to load dataset self.assert_data_received() # check that data was received as expected self.do_shutdown() def assert_dataset_loaded(self, name): rr = self.container.resource_registry # self.external_dataset = self.find_object_by_name(name, RT.ExternalDataset) devs, _ = rr.find_resources(RT.InstrumentDevice, name=name, id_only=False) self.assertEquals(len(devs), 1) self.device = devs[0] obj,_ = rr.find_objects(subject=self.device._id, predicate=PRED.hasAgentInstance, object_type=RT.ExternalDatasetAgentInstance) self.agent_instance = obj[0] obj,_ = rr.find_objects(object_type=RT.ExternalDatasetAgent, predicate=PRED.hasAgentDefinition, subject=self.agent_instance._id) self.agent = obj[0] driver_cfg = self.agent_instance.driver_config #stream_definition_id = driver_cfg['dh_cfg']['stream_def'] if 'dh_cfg' in driver_cfg else driver_cfg['stream_def'] #self.stream_definition = rr.read(stream_definition_id) self.data_product = rr.read_object(subject=self.device._id, predicate=PRED.hasOutputProduct, object_type=RT.DataProduct) self.dataset_id = rr.read_object(subject=self.data_product._id, predicate=PRED.hasDataset, object_type=RT.Dataset, id_only=True) ids,_ = rr.find_objects(subject=self.data_product._id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True) self.stream_id = ids[0] self.route = self.pubsub.read_stream_route(self.stream_id) def do_listen_for_incoming(self): subscription_id = self.pubsub.create_subscription('validator', data_product_ids=[self.data_product._id]) self.addCleanup(self.pubsub.delete_subscription, subscription_id) self.granule_capture = [] self.granule_count = 0 def on_granule(msg, route, stream_id): self.granule_count += 1 if self.granule_count < 5: self.granule_capture.append(msg) validator = StandaloneStreamSubscriber('validator', callback=on_granule) validator.start() self.addCleanup(validator.stop) self.pubsub.activate_subscription(subscription_id) self.addCleanup(self.pubsub.deactivate_subscription, subscription_id) self.dataset_modified = Event() def cb2(*args, **kwargs): self.dataset_modified.set() # TODO: event isn't using the ExternalDataset, but a different ID for a Dataset es = EventSubscriber(event_type=OT.DatasetModified, callback=cb2, origin=self.dataset_id) es.start() self.addCleanup(es.stop) def do_read_dataset(self): self.dams.start_external_dataset_agent_instance(self.agent_instance._id) # # should i wait for process (above) to start # before launching client (below)? # self.client = None end = time.time() + MAX_AGENT_START_TIME while not self.client and time.time() < end: try: self.client = ResourceAgentClient(self.device._id, process=FakeProcess()) except NotFound: time.sleep(2) if not self.client: self.fail(msg='external dataset agent process did not start in %d seconds' % MAX_AGENT_START_TIME) self.client.execute_agent(AgentCommand(command=ResourceAgentEvent.INITIALIZE)) self.client.execute_agent(AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)) self.client.execute_agent(AgentCommand(command=ResourceAgentEvent.RUN)) self.client.execute_resource(command=AgentCommand(command=DriverEvent.START_AUTOSAMPLE)) def assert_data_received(self): #let it go for up to 120 seconds, then stop the agent and reset it if not self.dataset_modified.is_set(): self.dataset_modified.wait(30) self.assertTrue(self.granule_count > 2, msg='granule count = %d'%self.granule_count) rdt = RecordDictionaryTool.load_from_granule(self.granule_capture[0]) self.assertAlmostEqual(0, rdt['oxygen'][0], delta=0.01) self.assertAlmostEqual(309.77, rdt['pressure'][0], delta=0.01) self.assertAlmostEqual(37.9848, rdt['conductivity'][0], delta=0.01) self.assertAlmostEqual(9.5163, rdt['temp'][0], delta=0.01) self.assertAlmostEqual(3527207897.0, rdt['time'][0], delta=1) def do_shutdown(self): self.dams.stop_external_dataset_agent_instance(self.agent_instance._id)
class ExhaustiveParameterTest(IonIntegrationTestCase): def setUp(self): self.i=0 self._start_container() self.container.start_rel_from_url('res/deploy/r2params.yml') self.dataset_management = DatasetManagementServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() self.resource_registry = self.container.resource_registry self.data_retriever = DataRetrieverServiceClient() pdicts, _ = self.resource_registry.find_resources(restype='ParameterDictionary', id_only=False) self.dp_ids = [] for pdict in pdicts: stream_def_id = self.pubsub_management.create_stream_definition(pdict.name, parameter_dictionary_id=pdict._id) dp_id = self.make_dp(stream_def_id) if dp_id: self.dp_ids.append(dp_id) def make_dp(self, stream_def_id): stream_def = self.resource_registry.read(stream_def_id) dp_obj = DataProduct( name=stream_def.name, description=stream_def.name, processing_level_code='Parsed_Canonical') data_product_id = self.data_product_management.create_data_product(dp_obj, stream_definition_id=stream_def_id) self.data_product_management.activate_data_product_persistence(data_product_id) return data_product_id def fill_values(self, ptype, size): if isinstance(ptype, ArrayType): return ['blah'] * size elif isinstance(ptype, QuantityType): return np.sin(np.arange(size, dtype=ptype.value_encoding) * 2 * np.pi / 3) elif isinstance(ptype, RecordType): return [{'record': 'ok'}] * size elif isinstance(ptype, ConstantRangeType): return (1,1000) elif isinstance(ptype, ConstantType): return np.dtype(ptype.value_encoding).type(1) elif isinstance(ptype, CategoryType): return ptype.categories.keys()[0] else: return def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40): ''' Loops until there is a sufficient amount of data in the dataset ''' done = False with gevent.Timeout(40): while not done: granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1) rdt = RecordDictionaryTool.load_from_granule(granule) extents = self.dataset_management.dataset_extents(dataset_id, rdt._pdict.temporal_parameter_name)[0] if rdt[rdt._pdict.temporal_parameter_name] and rdt[rdt._pdict.temporal_parameter_name][0] != rdt._pdict.get_context(rdt._pdict.temporal_parameter_name).fill_value and extents >= data_size: done = True else: gevent.sleep(0.2) def write_to_data_product(self,data_product_id): dataset_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasDataset', id_only=True) dataset_id = dataset_ids.pop() stream_ids , _ = self.resource_registry.find_objects(data_product_id, 'hasStream', id_only=True) stream_id = stream_ids.pop() stream_def_ids, _ = self.resource_registry.find_objects(stream_id, 'hasStreamDefinition', id_only=True) stream_def_id = stream_def_ids.pop() route = self.pubsub_management.read_stream_route(stream_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) time_param = rdt._pdict.temporal_parameter_name if time_param is None: print '%s has no temporal parameter' % self.resource_registry.read(data_product_id).name return rdt[time_param] = np.arange(40) for field in rdt.fields: if field == rdt._pdict.temporal_parameter_name: continue rdt[field] = self.fill_values(rdt._pdict.get_context(field).param_type,40) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id,40) granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(granule) bad = [] for field in rdt.fields: if not np.array_equal(rdt[field], rdt_out[field]): print '%s' % field print '%s != %s' % (rdt[field], rdt_out[field]) bad.append(field) return bad def test_data_products(self): bad_data_products = {} for dp_id in self.dp_ids: try: bad_fields = self.write_to_data_product(dp_id) if bad_fields: bad_data_products[dp_id] = "Couldn't write and retrieve %s." % bad_fields except: import traceback bad_data_products[dp_id] = traceback.format_exc() for dp_id, tb in bad_data_products.iteritems(): print '----------' print 'Problem with %s' % self.resource_registry.read(dp_id).name print tb print '----------' if bad_data_products: raise AssertionError('There are bad parameter dictionaries.')
class BulkIngestBase(object): """ awkward, non-obvious test class! subclasses will implement data-specific methods and this test class will parse sample file and assert data was read. test_data_ingest: create resources and call... start_agent: starts agent and then call... start_listener: starts listeners for data, including one that when granule is received calls... get_retrieve_client: asserts that callback had some data See replacement TestPreloadThenLoadDataset. A little more declarative and straight-forward, but much slower (requires preload). """ def setUp(self): self._start_container() self.container.start_rel_from_url("res/deploy/r2deploy.yml") self.pubsub_management = PubsubManagementServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() self.data_acquisition_management = DataAcquisitionManagementServiceClient() self.data_retriever = DataRetrieverServiceClient() self.process_dispatch_client = ProcessDispatcherServiceClient(node=self.container.node) self.resource_registry = self.container.resource_registry self.context_ids = self.build_param_contexts() self.setup_resources() def build_param_contexts(self): raise NotImplementedError("build_param_contexts must be implemented in child classes") def create_external_dataset(self): raise NotImplementedError("create_external_dataset must be implemented in child classes") def get_dvr_config(self): raise NotImplementedError("get_dvr_config must be implemented in child classes") def get_retrieve_client(self, dataset_id=""): raise NotImplementedError("get_retrieve_client must be implemented in child classes") def test_data_ingest(self): self.pdict_id = self.create_parameter_dict(self.name) self.stream_def_id = self.create_stream_def(self.name, self.pdict_id) self.data_product_id = self.create_data_product(self.name, self.description, self.stream_def_id) self.dataset_id = self.get_dataset_id(self.data_product_id) self.stream_id, self.route = self.get_stream_id_and_route(self.data_product_id) self.external_dataset_id = self.create_external_dataset() self.data_producer_id = self.register_external_dataset(self.external_dataset_id) self.start_agent() def create_parameter_dict(self, name=""): return self.dataset_management.create_parameter_dictionary( name=name, parameter_context_ids=self.context_ids, temporal_context="time" ) def create_stream_def(self, name="", pdict_id=""): return self.pubsub_management.create_stream_definition(name=name, parameter_dictionary_id=pdict_id) def create_data_product(self, name="", description="", stream_def_id=""): tdom, sdom = time_series_domain() tdom = tdom.dump() sdom = sdom.dump() dp_obj = DataProduct( name=name, description=description, processing_level_code="Parsed_Canonical", temporal_domain=tdom, spatial_domain=sdom, ) data_product_id = self.data_product_management.create_data_product( data_product=dp_obj, stream_definition_id=stream_def_id ) self.data_product_management.activate_data_product_persistence(data_product_id) return data_product_id def register_external_dataset(self, external_dataset_id=""): return self.data_acquisition_management.register_external_data_set(external_dataset_id=external_dataset_id) def get_dataset_id(self, data_product_id=""): dataset_ids, assocs = self.resource_registry.find_objects( subject=data_product_id, predicate="hasDataset", id_only=True ) return dataset_ids[0] def get_stream_id_and_route(self, data_product_id): stream_ids, _ = self.resource_registry.find_objects(data_product_id, PRED.hasStream, RT.Stream, id_only=True) stream_id = stream_ids[0] route = self.pubsub_management.read_stream_route(stream_id) # self.create_logger(self.name, stream_id) return stream_id, route def start_agent(self): agent_config = { "driver_config": self.get_dvr_config(), "stream_config": {}, "agent": {"resource_id": self.external_dataset_id}, "test_mode": True, } self._ia_pid = self.container.spawn_process( name=self.EDA_NAME, module=self.EDA_MOD, cls=self.EDA_CLS, config=agent_config ) self._ia_client = ResourceAgentClient(self.external_dataset_id, process=FakeProcess()) cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=ResourceAgentEvent.RUN) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=DriverEvent.START_AUTOSAMPLE) self._ia_client.execute_resource(command=cmd) self.start_listener(self.dataset_id) def stop_agent(self): cmd = AgentCommand(command=DriverEvent.STOP_AUTOSAMPLE) self._ia_client.execute_resource(cmd) cmd = AgentCommand(command=ResourceAgentEvent.RESET) self._ia_client.execute_agent(cmd) self.container.terminate_process(self._ia_pid) def start_listener(self, dataset_id=""): dataset_modified = Event() # callback to use retrieve to get data from the coverage def cb(*args, **kwargs): self.get_retrieve_client(dataset_id=dataset_id) # callback to keep execution going once dataset has been fully ingested def cb2(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id) es.start() es2 = EventSubscriber( event_type=OT.DeviceCommonLifecycleEvent, callback=cb2, origin="BaseDataHandler._acquire_sample" ) es2.start() self.addCleanup(es.stop) self.addCleanup(es2.stop) # let it go for up to 120 seconds, then stop the agent and reset it dataset_modified.wait(120) self.stop_agent() def create_logger(self, name, stream_id=""): # logger process producer_definition = ProcessDefinition(name=name + "_logger") producer_definition.executable = { "module": "ion.processes.data.stream_granule_logger", "class": "StreamGranuleLogger", } logger_procdef_id = self.process_dispatch_client.create_process_definition( process_definition=producer_definition ) configuration = {"process": {"stream_id": stream_id}} pid = self.process_dispatch_client.schedule_process( process_definition_id=logger_procdef_id, configuration=configuration ) return pid
class CtdbpTransformsIntTest(IonIntegrationTestCase): def setUp(self): super(CtdbpTransformsIntTest, self).setUp() self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub = PubsubManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.dataproduct_management = DataProductManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() # This is for the time values inside the packets going into the transform self.i = 0 # Cleanup of queue created by the subscriber def _get_new_ctd_packet(self, stream_definition_id, length): rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) rdt['time'] = numpy.arange(self.i, self.i+length) for field in rdt: if isinstance(rdt._pdict.get_context(field).param_type, QuantityType): rdt[field] = numpy.array([random.uniform(0.0,75.0) for i in xrange(length)]) g = rdt.to_granule() self.i+=length return g def _create_input_param_dict_for_test(self, parameter_dict_name = ''): pdict = ParameterDictionary() t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=numpy.dtype('float64'))) t_ctxt.axis = AxisTypeEnum.TIME t_ctxt.uom = 'seconds since 01-01-1900' pdict.add_context(t_ctxt) cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) cond_ctxt.uom = '' pdict.add_context(cond_ctxt) pres_ctxt = ParameterContext('pressure', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) pres_ctxt.uom = '' pdict.add_context(pres_ctxt) temp_ctxt = ParameterContext('temperature', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) temp_ctxt.uom = '' pdict.add_context(temp_ctxt) dens_ctxt = ParameterContext('density', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) dens_ctxt.uom = '' pdict.add_context(dens_ctxt) sal_ctxt = ParameterContext('salinity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) sal_ctxt.uom = '' pdict.add_context(sal_ctxt) #create temp streamdef so the data product can create the stream pc_list = [] for pc_k, pc in pdict.iteritems(): ctxt_id = self.dataset_management.create_parameter_context(pc_k, pc[1].dump()) pc_list.append(ctxt_id) self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id) pdict_id = self.dataset_management.create_parameter_dictionary(parameter_dict_name, pc_list) self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) return pdict_id def test_ctdbp_L0_all(self): """ Test packets processed by the ctdbp_L0_all transform """ #----------- Data Process Definition -------------------------------- dpd_obj = IonObject(RT.DataProcessDefinition, name='CTDBP_L0_all', description='Take parsed stream and put the C, T and P into three separate L0 streams.', module='ion.processes.data.transforms.ctdbp.ctdbp_L0', class_name='CTDBP_L0_all') dprocdef_id = self.data_process_management.create_data_process_definition(dpd_obj) self.addCleanup(self.data_process_management.delete_data_process_definition, dprocdef_id) log.debug("created data process definition: id = %s", dprocdef_id) #----------- Data Products -------------------------------- # Construct temporal and spatial Coordinate Reference System objects tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() input_param_dict = self._create_input_param_dict_for_test(parameter_dict_name = 'fictitious_ctdp_param_dict') # Get the stream definition for the stream using the parameter dictionary # input_param_dict = self.dataset_management.read_parameter_dictionary_by_name('ctdbp_cdef_sample', id_only=True) input_stream_def_dict = self.pubsub.create_stream_definition(name='parsed', parameter_dictionary_id=input_param_dict) self.addCleanup(self.pubsub.delete_stream_definition, input_stream_def_dict) log.debug("Got the parsed parameter dictionary: id: %s", input_param_dict) log.debug("Got the stream def for parsed input: %s", input_stream_def_dict) # Input data product parsed_stream_dp_obj = IonObject(RT.DataProduct, name='parsed_stream', description='Parsed stream input to CTBP L0 transform', temporal_domain = tdom, spatial_domain = sdom) input_dp_id = self.dataproduct_management.create_data_product(data_product=parsed_stream_dp_obj, stream_definition_id=input_stream_def_dict ) self.addCleanup(self.dataproduct_management.delete_data_product, input_dp_id) # output data product L0_stream_dp_obj = IonObject(RT.DataProduct, name='L0_stream', description='L0_stream output of CTBP L0 transform', temporal_domain = tdom, spatial_domain = sdom) L0_stream_dp_id = self.dataproduct_management.create_data_product(data_product=L0_stream_dp_obj, stream_definition_id=input_stream_def_dict ) self.addCleanup(self.dataproduct_management.delete_data_product, L0_stream_dp_id) # We need the key name here to be "L0_stream", since when the data process is launched, this name goes into # the config as in config.process.publish_streams.L0_stream when the config is used to launch the data process self.output_products = {'L0_stream' : L0_stream_dp_id} out_stream_ids, _ = self.resource_registry.find_objects(L0_stream_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(out_stream_ids)) output_stream_id = out_stream_ids[0] dproc_id = self.data_process_management.create_data_process( dprocdef_id, [input_dp_id], self.output_products) self.addCleanup(self.data_process_management.delete_data_process, dproc_id) log.debug("Created a data process for ctdbp_L0. id: %s", dproc_id) # Activate the data process self.data_process_management.activate_data_process(dproc_id) self.addCleanup(self.data_process_management.deactivate_data_process, dproc_id) #----------- Find the stream that is associated with the input data product when it was created by create_data_product() -------------------------------- stream_ids, _ = self.resource_registry.find_objects(input_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(stream_ids)) input_stream_id = stream_ids[0] stream_route = self.pubsub.read_stream_route(input_stream_id) log.debug("The input stream for the L0 transform: %s", input_stream_id) #----------- Create a subscriber that will listen to the transform's output -------------------------------- ar = gevent.event.AsyncResult() def subscriber(m,r,s): ar.set(m) sub = StandaloneStreamSubscriber(exchange_name='sub', callback=subscriber) sub_id = self.pubsub.create_subscription('subscriber_to_transform', stream_ids=[output_stream_id], exchange_name='sub') self.addCleanup(self.pubsub.delete_subscription, sub_id) self.pubsub.activate_subscription(sub_id) self.addCleanup(self.pubsub.deactivate_subscription, sub_id) sub.start() self.addCleanup(sub.stop) #----------- Publish on that stream so that the transform can receive it -------------------------------- pub = StandaloneStreamPublisher(input_stream_id, stream_route) publish_granule = self._get_new_ctd_packet(stream_definition_id=input_stream_def_dict, length = 5) pub.publish(publish_granule) log.debug("Published the following granule: %s", publish_granule) granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the transform: %s", granule_from_transform) # Check that the granule published by the L0 transform has the right properties self._check_granule_from_transform(granule_from_transform) def _check_granule_from_transform(self, granule): """ An internal method to check if a granule has the right properties """ pass
class TestPreloadThenLoadDataset(IonIntegrationTestCase): """ replicates the TestHypm_WPF_CTD test (same handler/parser/data file) but uses the preload system to define the ExternalDataset and related resources, then invokes services to perform the load """ def setUp(self): # Start container self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') config = dict(op="load", scenario="BETA,NOSE", attachments="res/preload/r2_ioc/attachments") self.container.spawn_process("Loader", "ion.processes.bootstrap.ion_loader", "IONLoader", config=config) self.pubsub = PubsubManagementServiceClient() self.dams = DataAcquisitionManagementServiceClient() def find_object_by_name(self, name, resource_type): objects,_ = self.container.resource_registry.find_resources(resource_type) self.assertTrue(len(objects) >= 1) # filtered_objs = [obj for obj in objects if obj.name == name] filtered_objs = [] for obj in objects: if obj.name==name: filtered_objs.append(obj) self.assertEquals(len(filtered_objs), 1, msg='Found %d objects with name %s'%(len(filtered_objs),name)) return filtered_objs[0] def test_use_case(self): # setUp() has already started the container and performed the preload # self.assert_dataset_loaded('Test External CTD Dataset') # make sure we have the ExternalDataset resources self.assert_dataset_loaded('Unit Test SMB37') # association changed -- now use device name self.do_listen_for_incoming() # listen for any data being received from the dataset self.do_read_dataset() # call services to load dataset self.assert_data_received() # check that data was received as expected self.do_shutdown() def assert_dataset_loaded(self, name): # self.external_dataset = self.find_object_by_name(name, RT.ExternalDataset) self.device = self.find_object_by_name(name, RT.InstrumentDevice) rr = self.container.resource_registry obj,_ = rr.find_objects(subject=self.device._id, predicate=PRED.hasAgentInstance, object_type=RT.ExternalDatasetAgentInstance) self.agent_instance = obj[0] obj,_ = rr.find_objects(object_type=RT.ExternalDatasetAgent, predicate=PRED.hasAgentDefinition, subject=self.agent_instance._id) self.agent = obj[0] stream_definition_id = self.agent_instance.dataset_driver_config['dh_cfg']['stream_def'] if 'dh_cfg' in self.agent_instance.dataset_driver_config else self.agent_instance.dataset_driver_config['stream_def'] self.stream_definition = rr.read(stream_definition_id) # data_producer_id = self.agent_instance.dataset_driver_config['dh_cfg']['data_producer_id'] if 'dh_cfg' in self.agent_instance.dataset_driver_config else self.agent_instance.dataset_driver_config['data_producer_id'] # self.data_producer = rr.read(data_producer_id) #subject="", predicate="", object_type="", assoc="", id_only=False) # self.data_product = rr.read_object(object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, subject=self.external_dataset._id) self.data_product = rr.read_object(object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, subject=self.device._id) ids,_ = rr.find_objects(self.data_product._id, PRED.hasStream, RT.Stream, id_only=True) self.stream_id = ids[0] self.route = self.pubsub.read_stream_route(self.stream_id) def do_listen_for_incoming(self): subscription_id = self.pubsub.create_subscription('validator', data_product_ids=[self.data_product._id]) self.addCleanup(self.pubsub.delete_subscription, subscription_id) self.granule_capture = [] self.granule_count = 0 def on_granule(msg, route, stream_id): self.granule_count += 1 if self.granule_count<5: self.granule_capture.append(msg) validator = StandaloneStreamSubscriber('validator', callback=on_granule) validator.start() self.addCleanup(validator.stop) self.pubsub.activate_subscription(subscription_id) self.addCleanup(self.pubsub.deactivate_subscription, subscription_id) self.dataset_modified = Event() def cb2(*args, **kwargs): self.dataset_modified.set() # TODO: event isn't using the ExternalDataset, but a different ID for a Dataset es = EventSubscriber(event_type=OT.DatasetModified, callback=cb2, origin=self.device._id) es.start() self.addCleanup(es.stop) def do_read_dataset(self): self.dams.start_external_dataset_agent_instance(self.agent_instance._id) # # should i wait for process (above) to start # before launching client (below)? # self.client = ResourceAgentClient(self.device._id, process=FakeProcess()) self.client.execute_agent(AgentCommand(command=ResourceAgentEvent.INITIALIZE)) self.client.execute_agent(AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)) self.client.execute_agent(AgentCommand(command=ResourceAgentEvent.RUN)) self.client.execute_resource(command=AgentCommand(command=DriverEvent.START_AUTOSAMPLE)) def assert_data_received(self): #let it go for up to 120 seconds, then stop the agent and reset it if not self.dataset_modified.is_set(): self.dataset_modified.wait(30) self.assertTrue(self.granule_count>2, msg='granule count = %d'%self.granule_count) rdt = RecordDictionaryTool.load_from_granule(self.granule_capture[0]) self.assertAlmostEqual(0, rdt['oxygen'][0], delta=0.01) self.assertAlmostEqual(309.77, rdt['pressure'][0], delta=0.01) self.assertAlmostEqual(37.9848, rdt['conductivity'][0], delta=0.01) self.assertAlmostEqual(9.5163, rdt['temp'][0], delta=0.01) self.assertAlmostEqual(1318219097, rdt['time'][0], delta=1) def do_shutdown(self): self.dams.stop_external_dataset_agent_instance(self.agent_instance._id)
class TestOmsLaunch(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.omsclient = ObservatoryManagementServiceClient(node=self.container.node) self.imsclient = InstrumentManagementServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node) self.dpclient = DataProductManagementServiceClient(node=self.container.node) self.pubsubcli = PubsubManagementServiceClient(node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node) self.dataset_management = DatasetManagementServiceClient() self.platformModel_id = None # rsn_oms: to retrieve network structure and information from RSN-OMS: # Note that OmsClientFactory will create an "embedded" RSN OMS # simulator object by default. self.rsn_oms = OmsClientFactory.create_instance() self.all_platforms = {} self.topology = {} self.agent_device_map = {} self.agent_streamconfig_map = {} self._async_data_result = AsyncResult() self._data_subscribers = [] self._samples_received = [] self.addCleanup(self._stop_data_subscribers) self._async_event_result = AsyncResult() self._event_subscribers = [] self._events_received = [] self.addCleanup(self._stop_event_subscribers) self._start_event_subscriber() self._set_up_DataProduct_obj() self._set_up_PlatformModel_obj() def _set_up_DataProduct_obj(self): # Create data product object to be used for each of the platform log streams tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() pdict_id = self.dataset_management.read_parameter_dictionary_by_name('platform_eng_parsed', id_only=True) self.platform_eng_stream_def_id = self.pubsubcli.create_stream_definition( name='platform_eng', parameter_dictionary_id=pdict_id) self.dp_obj = IonObject(RT.DataProduct, name='platform_eng data', description='platform_eng test', temporal_domain = tdom, spatial_domain = sdom) def _set_up_PlatformModel_obj(self): # Create PlatformModel platformModel_obj = IonObject(RT.PlatformModel, name='RSNPlatformModel', description="RSNPlatformModel") try: self.platformModel_id = self.imsclient.create_platform_model(platformModel_obj) except BadRequest as ex: self.fail("failed to create new PLatformModel: %s" %ex) log.debug( 'new PlatformModel id = %s', self.platformModel_id) def _traverse(self, platform_id, parent_platform_objs=None): """ Recursive routine that repeatedly calls _prepare_platform to build the object dictionary for each platform. @param platform_id ID of the platform to be visited @param parent_platform_objs dict of objects associated to parent platform, if any. @retval the dict returned by _prepare_platform at this level. """ log.info("Starting _traverse for %r", platform_id) plat_objs = self._prepare_platform(platform_id, parent_platform_objs) self.all_platforms[platform_id] = plat_objs # now, traverse the children: retval = self.rsn_oms.getSubplatformIDs(platform_id) subplatform_ids = retval[platform_id] for subplatform_id in subplatform_ids: self._traverse(subplatform_id, plat_objs) # note, topology indexed by platform_id self.topology[platform_id] = plat_objs['children'] return plat_objs def _prepare_platform(self, platform_id, parent_platform_objs): """ This routine generalizes the manual construction currently done in test_oms_launch.py. It is called by the recursive _traverse method so all platforms starting from a given base platform are prepared. Note: For simplicity in this test, sites are organized in the same hierarchical way as the platforms themselves. @param platform_id ID of the platform to be visited @param parent_platform_objs dict of objects associated to parent platform, if any. @retval a dict of associated objects similar to those in test_oms_launch """ site__obj = IonObject(RT.PlatformSite, name='%s_PlatformSite' % platform_id, description='%s_PlatformSite platform site' % platform_id) site_id = self.omsclient.create_platform_site(site__obj) if parent_platform_objs: # establish hasSite association with the parent self.rrclient.create_association( subject=parent_platform_objs['site_id'], predicate=PRED.hasSite, object=site_id) # prepare platform attributes and ports: monitor_attributes = self._prepare_platform_attributes(platform_id) ports = self._prepare_platform_ports(platform_id) device__obj = IonObject(RT.PlatformDevice, name='%s_PlatformDevice' % platform_id, description='%s_PlatformDevice platform device' % platform_id, ports=ports, platform_monitor_attributes = monitor_attributes) device_id = self.imsclient.create_platform_device(device__obj) self.imsclient.assign_platform_model_to_platform_device(self.platformModel_id, device_id) self.rrclient.create_association(subject=site_id, predicate=PRED.hasDevice, object=device_id) self.damsclient.register_instrument(instrument_id=device_id) if parent_platform_objs: # establish hasDevice association with the parent self.rrclient.create_association( subject=parent_platform_objs['device_id'], predicate=PRED.hasDevice, object=device_id) agent__obj = IonObject(RT.PlatformAgent, name='%s_PlatformAgent' % platform_id, description='%s_PlatformAgent platform agent' % platform_id) agent_id = self.imsclient.create_platform_agent(agent__obj) if parent_platform_objs: # add this platform_id to parent's children: parent_platform_objs['children'].append(platform_id) self.imsclient.assign_platform_model_to_platform_agent(self.platformModel_id, agent_id) # agent_instance_obj = IonObject(RT.PlatformAgentInstance, # name='%s_PlatformAgentInstance' % platform_id, # description="%s_PlatformAgentInstance" % platform_id) # # agent_instance_id = self.imsclient.create_platform_agent_instance( # agent_instance_obj, agent_id, device_id) plat_objs = { 'platform_id': platform_id, 'site__obj': site__obj, 'site_id': site_id, 'device__obj': device__obj, 'device_id': device_id, 'agent__obj': agent__obj, 'agent_id': agent_id, # 'agent_instance_obj': agent_instance_obj, # 'agent_instance_id': agent_instance_id, 'children': [] } log.info("plat_objs for platform_id %r = %s", platform_id, str(plat_objs)) self.agent_device_map[platform_id] = device__obj stream_config = self._create_stream_config(plat_objs) self.agent_streamconfig_map[platform_id] = stream_config # self._start_data_subscriber(agent_instance_id, stream_config) return plat_objs def _prepare_platform_attributes(self, platform_id): """ Returns the list of PlatformMonitorAttributes objects corresponding to the attributes associated to the given platform. """ result = self.rsn_oms.getPlatformAttributes(platform_id) self.assertTrue(platform_id in result) ret_infos = result[platform_id] monitor_attributes = [] for attrName, attrDfn in ret_infos.iteritems(): log.debug("platform_id=%r: preparing attribute=%r", platform_id, attrName) monitor_rate = attrDfn['monitorCycleSeconds'] units = attrDfn['units'] plat_attr_obj = IonObject(OT.PlatformMonitorAttributes, id=attrName, monitor_rate=monitor_rate, units=units) monitor_attributes.append(plat_attr_obj) return monitor_attributes def _prepare_platform_ports(self, platform_id): """ Returns the list of PlatformPort objects corresponding to the ports associated to the given platform. """ result = self.rsn_oms.getPlatformPorts(platform_id) self.assertTrue(platform_id in result) port_dict = result[platform_id] ports = [] for port_id, port in port_dict.iteritems(): log.debug("platform_id=%r: preparing port=%r", platform_id, port_id) ip_address = port['comms']['ip'] plat_port_obj = IonObject(OT.PlatformPort, port_id=port_id, ip_address=ip_address) ports.append(plat_port_obj) return ports def _create_stream_config(self, plat_objs): platform_id = plat_objs['platform_id'] device_id = plat_objs['device_id'] #create the log data product self.dp_obj.name = '%s platform_eng data' % platform_id data_product_id = self.dpclient.create_data_product(data_product=self.dp_obj, stream_definition_id=self.platform_eng_stream_def_id) self.damsclient.assign_data_product(input_resource_id=device_id, data_product_id=data_product_id) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(data_product_id, PRED.hasStream, None, True) stream_config = self._build_stream_config(stream_ids[0]) return stream_config def _build_stream_config(self, stream_id=''): platform_eng_dictionary = DatasetManagementService.get_parameter_dictionary_by_name('platform_eng_parsed') #get the streamroute object from pubsub by passing the stream_id stream_def_ids, _ = self.rrclient.find_objects(stream_id, PRED.hasStreamDefinition, RT.StreamDefinition, True) stream_route = self.pubsubcli.read_stream_route(stream_id=stream_id) stream_config = {'routing_key' : stream_route.routing_key, 'stream_id' : stream_id, 'stream_definition_ref' : stream_def_ids[0], 'exchange_point' : stream_route.exchange_point, 'parameter_dictionary':platform_eng_dictionary.dump()} return stream_config def _set_platform_agent_instances(self): """ Once most of the objs/defs associated with all platforms are in place, this method creates and associates the PlatformAgentInstance elements. """ self.platform_configs = {} for platform_id, plat_objs in self.all_platforms.iteritems(): PLATFORM_CONFIG = self.platform_configs[platform_id] = { 'platform_id': platform_id, 'platform_topology': self.topology, # 'agent_device_map': self.agent_device_map, 'agent_streamconfig_map': self.agent_streamconfig_map, 'driver_config': DVR_CONFIG, } agent_config = { 'platform_config': PLATFORM_CONFIG, } agent_instance_obj = IonObject(RT.PlatformAgentInstance, name='%s_PlatformAgentInstance' % platform_id, description="%s_PlatformAgentInstance" % platform_id, agent_config=agent_config) agent_id = plat_objs['agent_id'] device_id = plat_objs['device_id'] agent_instance_id = self.imsclient.create_platform_agent_instance( agent_instance_obj, agent_id, device_id) plat_objs['agent_instance_obj'] = agent_instance_obj plat_objs['agent_instance_id'] = agent_instance_id stream_config = self.agent_streamconfig_map[platform_id] self._start_data_subscriber(agent_instance_id, stream_config) def _start_data_subscriber(self, stream_name, stream_config): """ Starts data subscriber for the given stream_name and stream_config """ def consume_data(message, stream_route, stream_id): # A callback for processing subscribed-to data. log.info('Subscriber received data message: %s.', str(message)) self._samples_received.append(message) self._async_data_result.set() log.info('_start_data_subscriber stream_name=%r', stream_name) stream_id = stream_config['stream_id'] # Create subscription for the stream exchange_name = '%s_queue' % stream_name self.container.ex_manager.create_xn_queue(exchange_name).purge() sub = StandaloneStreamSubscriber(exchange_name, consume_data) sub.start() self._data_subscribers.append(sub) sub_id = self.pubsubcli.create_subscription(name=exchange_name, stream_ids=[stream_id]) self.pubsubcli.activate_subscription(sub_id) sub.subscription_id = sub_id def _stop_data_subscribers(self): """ Stop the data subscribers on cleanup. """ try: for sub in self._data_subscribers: if hasattr(sub, 'subscription_id'): try: self.pubsubcli.deactivate_subscription(sub.subscription_id) except: pass self.pubsubcli.delete_subscription(sub.subscription_id) sub.stop() finally: self._data_subscribers = [] def _start_event_subscriber(self, event_type="PlatformAlarmEvent", sub_type="power"): """ Starts event subscriber for events of given event_type ("PlatformAlarmEvent" by default) and given sub_type ("power" by default). """ # TODO note: ion-definitions still using 'PlatformAlarmEvent' but we # should probably define 'PlatformExternalEvent' or something like that. def consume_event(evt, *args, **kwargs): # A callback for consuming events. log.info('Event subscriber received evt: %s.', str(evt)) self._events_received.append(evt) self._async_event_result.set(evt) sub = EventSubscriber(event_type=event_type, sub_type=sub_type, callback=consume_event) sub.start() log.info("registered event subscriber for event_type=%r, sub_type=%r", event_type, sub_type) self._event_subscribers.append(sub) sub._ready_event.wait(timeout=EVENT_TIMEOUT) def _stop_event_subscribers(self): """ Stops the event subscribers on cleanup. """ try: for sub in self._event_subscribers: if hasattr(sub, 'subscription_id'): try: self.pubsubcli.deactivate_subscription(sub.subscription_id) except: pass self.pubsubcli.delete_subscription(sub.subscription_id) sub.stop() finally: self._event_subscribers = [] def test_oms_create_and_launch(self): # pick a base platform: base_platform_id = BASE_PLATFORM_ID # and trigger the traversal of the branch rooted at that base platform # to create corresponding ION objects and configuration dictionaries: base_platform_objs = self._traverse(base_platform_id) # now that most of the topology information is there, add the # PlatformAgentInstance elements self._set_platform_agent_instances() base_platform_config = self.platform_configs[base_platform_id] log.info("base_platform_id = %r", base_platform_id) log.info("topology = %s", str(self.topology)) #------------------------------- # Launch Base Platform AgentInstance, connect to the resource agent client #------------------------------- agent_instance_id = base_platform_objs['agent_instance_id'] pid = self.imsclient.start_platform_agent_instance(platform_agent_instance_id=agent_instance_id) log.debug("start_platform_agent_instance returned pid=%s", pid) #wait for start instance_obj = self.imsclient.read_platform_agent_instance(agent_instance_id) gate = ProcessStateGate(self.processdispatchclient.read_process, instance_obj.agent_process_id, ProcessStateEnum.RUNNING) self.assertTrue(gate.await(90), "The platform agent instance did not spawn in 90 seconds") agent_instance_obj= self.imsclient.read_instrument_agent_instance(agent_instance_id) log.debug('test_oms_create_and_launch: Platform agent instance obj: %s', str(agent_instance_obj)) # Start a resource agent client to talk with the instrument agent. self._pa_client = ResourceAgentClient('paclient', name=agent_instance_obj.agent_process_id, process=FakeProcess()) log.debug(" test_oms_create_and_launch:: got pa client %s", str(self._pa_client)) log.debug("base_platform_config =\n%s", base_platform_config) # ping_agent can be issued before INITIALIZE retval = self._pa_client.ping_agent(timeout=TIMEOUT) log.debug( 'Base Platform ping_agent = %s', str(retval) ) # issue INITIALIZE command to the base platform, which will launch the # creation of the whole platform hierarchy rooted at base_platform_config['platform_id'] # cmd = AgentCommand(command=PlatformAgentEvent.INITIALIZE, kwargs=dict(plat_config=base_platform_config)) cmd = AgentCommand(command=PlatformAgentEvent.INITIALIZE) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug( 'Base Platform INITIALIZE = %s', str(retval) ) # GO_ACTIVE cmd = AgentCommand(command=PlatformAgentEvent.GO_ACTIVE) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug( 'Base Platform GO_ACTIVE = %s', str(retval) ) # RUN: this command includes the launch of the resource monitoring greenlets cmd = AgentCommand(command=PlatformAgentEvent.RUN) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug( 'Base Platform RUN = %s', str(retval) ) # START_EVENT_DISPATCH kwargs = dict(params="TODO set params") cmd = AgentCommand(command=PlatformAgentEvent.START_EVENT_DISPATCH, kwargs=kwargs) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) self.assertTrue(retval.result is not None) # wait for data sample # just wait for at least one -- see consume_data above log.info("waiting for reception of a data sample...") self._async_data_result.get(timeout=DATA_TIMEOUT) self.assertTrue(len(self._samples_received) >= 1) log.info("waiting a bit more for reception of more data samples...") sleep(10) log.info("Got data samples: %d", len(self._samples_received)) # wait for event # just wait for at least one event -- see consume_event above log.info("waiting for reception of an event...") self._async_event_result.get(timeout=EVENT_TIMEOUT) log.info("Received events: %s", len(self._events_received)) # STOP_EVENT_DISPATCH cmd = AgentCommand(command=PlatformAgentEvent.STOP_EVENT_DISPATCH) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) self.assertTrue(retval.result is not None) # GO_INACTIVE cmd = AgentCommand(command=PlatformAgentEvent.GO_INACTIVE) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug( 'Base Platform GO_INACTIVE = %s', str(retval) ) # RESET: Resets the base platform agent, which includes termination of # its sub-platforms processes: cmd = AgentCommand(command=PlatformAgentEvent.RESET) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug( 'Base Platform RESET = %s', str(retval) ) #------------------------------- # Stop Base Platform AgentInstance #------------------------------- self.imsclient.stop_platform_agent_instance(platform_agent_instance_id=agent_instance_id)
class BulkIngestBase(object): """ awkward, non-obvious test class! subclasses will implement data-specific methods and this test class will parse sample file and assert data was read. test_data_ingest: create resources and call... start_agent: starts agent and then call... start_listener: starts listeners for data, including one that when granule is received calls... get_retrieve_client: asserts that callback had some data See replacement TestPreloadThenLoadDataset. A little more declarative and straight-forward, but much slower (requires preload). """ def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub_management = PubsubManagementServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() self.data_acquisition_management = DataAcquisitionManagementServiceClient() self.data_retriever = DataRetrieverServiceClient() self.process_dispatch_client = ProcessDispatcherServiceClient(node=self.container.node) self.resource_registry = self.container.resource_registry self.context_ids = self.build_param_contexts() self.setup_resources() def build_param_contexts(self): raise NotImplementedError('build_param_contexts must be implemented in child classes') def create_external_dataset(self): raise NotImplementedError('create_external_dataset must be implemented in child classes') def get_dvr_config(self): raise NotImplementedError('get_dvr_config must be implemented in child classes') def get_retrieve_client(self, dataset_id=''): raise NotImplementedError('get_retrieve_client must be implemented in child classes') def test_data_ingest(self): self.pdict_id = self.create_parameter_dict(self.name) self.stream_def_id = self.create_stream_def(self.name, self.pdict_id) self.data_product_id = self.create_data_product(self.name, self.description, self.stream_def_id) self.dataset_id = self.get_dataset_id(self.data_product_id) self.stream_id, self.route = self.get_stream_id_and_route(self.data_product_id) self.external_dataset_id = self.create_external_dataset() self.data_producer_id = self.register_external_dataset(self.external_dataset_id) self.start_agent() def create_parameter_dict(self, name=''): return self.dataset_management.create_parameter_dictionary(name=name, parameter_context_ids=self.context_ids, temporal_context='time') def create_stream_def(self, name='', pdict_id=''): return self.pubsub_management.create_stream_definition(name=name, parameter_dictionary_id=pdict_id) def create_data_product(self, name='', description='', stream_def_id=''): tdom, sdom = time_series_domain() tdom = tdom.dump() sdom = sdom.dump() dp_obj = DataProduct( name=name, description=description, processing_level_code='Parsed_Canonical', temporal_domain=tdom, spatial_domain=sdom) data_product_id = self.data_product_management.create_data_product(data_product=dp_obj, stream_definition_id=stream_def_id) self.data_product_management.activate_data_product_persistence(data_product_id) self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id) return data_product_id def register_external_dataset(self, external_dataset_id=''): return self.data_acquisition_management.register_external_data_set(external_dataset_id=external_dataset_id) def get_dataset_id(self, data_product_id=''): dataset_ids, assocs = self.resource_registry.find_objects(subject=data_product_id, predicate='hasDataset', id_only=True) return dataset_ids[0] def get_stream_id_and_route(self, data_product_id): stream_ids, _ = self.resource_registry.find_objects(data_product_id, PRED.hasStream, RT.Stream, id_only=True) stream_id = stream_ids[0] route = self.pubsub_management.read_stream_route(stream_id) #self.create_logger(self.name, stream_id) return stream_id, route def start_agent(self): agent_config = { 'driver_config': self.get_dvr_config(), 'stream_config': {}, 'agent': {'resource_id': self.external_dataset_id}, 'test_mode': True } self._ia_pid = self.container.spawn_process( name=self.EDA_NAME, module=self.EDA_MOD, cls=self.EDA_CLS, config=agent_config) self._ia_client = ResourceAgentClient(self.external_dataset_id, process=FakeProcess()) cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=ResourceAgentEvent.RUN) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=DriverEvent.START_AUTOSAMPLE) self._ia_client.execute_resource(command=cmd) self.start_listener(self.dataset_id) def stop_agent(self): cmd = AgentCommand(command=DriverEvent.STOP_AUTOSAMPLE) self._ia_client.execute_resource(cmd) cmd = AgentCommand(command=ResourceAgentEvent.RESET) self._ia_client.execute_agent(cmd) self.container.terminate_process(self._ia_pid) def start_listener(self, dataset_id=''): dataset_modified = Event() #callback to use retrieve to get data from the coverage def cb(*args, **kwargs): self.get_retrieve_client(dataset_id=dataset_id) #callback to keep execution going once dataset has been fully ingested def cb2(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id) es.start() es2 = EventSubscriber(event_type=OT.DeviceCommonLifecycleEvent, callback=cb2, origin='BaseDataHandler._acquire_sample') es2.start() self.addCleanup(es.stop) self.addCleanup(es2.stop) #let it go for up to 120 seconds, then stop the agent and reset it dataset_modified.wait(120) self.stop_agent() def create_logger(self, name, stream_id=''): # logger process producer_definition = ProcessDefinition(name=name+'_logger') producer_definition.executable = { 'module':'ion.processes.data.stream_granule_logger', 'class':'StreamGranuleLogger' } logger_procdef_id = self.process_dispatch_client.create_process_definition(process_definition=producer_definition) configuration = { 'process':{ 'stream_id':stream_id, } } pid = self.process_dispatch_client.schedule_process(process_definition_id=logger_procdef_id, configuration=configuration) return pid
class TestPreloadThenLoadDataset(IonIntegrationTestCase): """ Uses the preload system to define the ExternalDataset and related resources, then invokes services to perform the load """ def setUp(self): # Start container self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') config = dict(op="load", scenario="NOSE", attachments="res/preload/r2_ioc/attachments") self.container.spawn_process("Loader", "ion.processes.bootstrap.ion_loader", "IONLoader", config=config) self.pubsub = PubsubManagementServiceClient() self.dams = DataAcquisitionManagementServiceClient() @unittest.skip("depricated test, now in mi repo") def test_use_case(self): # setUp() has already started the container and performed the preload # self.assert_dataset_loaded('Test External CTD Dataset') # make sure we have the ExternalDataset resources self.assert_dataset_loaded( 'Unit Test SMB37') # association changed -- now use device name self.do_listen_for_incoming( ) # listen for any data being received from the dataset self.do_read_dataset() # call services to load dataset self.assert_data_received() # check that data was received as expected self.do_shutdown() def assert_dataset_loaded(self, name): rr = self.container.resource_registry # self.external_dataset = self.find_object_by_name(name, RT.ExternalDataset) devs, _ = rr.find_resources(RT.InstrumentDevice, name=name, id_only=False) self.assertEquals(len(devs), 1) self.device = devs[0] obj, _ = rr.find_objects(subject=self.device._id, predicate=PRED.hasAgentInstance, object_type=RT.ExternalDatasetAgentInstance) self.agent_instance = obj[0] obj, _ = rr.find_objects(object_type=RT.ExternalDatasetAgent, predicate=PRED.hasAgentDefinition, subject=self.agent_instance._id) self.agent = obj[0] driver_cfg = self.agent_instance.driver_config #stream_definition_id = driver_cfg['dh_cfg']['stream_def'] if 'dh_cfg' in driver_cfg else driver_cfg['stream_def'] #self.stream_definition = rr.read(stream_definition_id) self.data_product = rr.read_object(subject=self.device._id, predicate=PRED.hasOutputProduct, object_type=RT.DataProduct) self.dataset_id = rr.read_object(subject=self.data_product._id, predicate=PRED.hasDataset, object_type=RT.Dataset, id_only=True) ids, _ = rr.find_objects(subject=self.data_product._id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True) self.stream_id = ids[0] self.route = self.pubsub.read_stream_route(self.stream_id) def do_listen_for_incoming(self): subscription_id = self.pubsub.create_subscription( 'validator', data_product_ids=[self.data_product._id]) self.addCleanup(self.pubsub.delete_subscription, subscription_id) self.granule_capture = [] self.granule_count = 0 def on_granule(msg, route, stream_id): self.granule_count += 1 if self.granule_count < 5: self.granule_capture.append(msg) validator = StandaloneStreamSubscriber('validator', callback=on_granule) validator.start() self.addCleanup(validator.stop) self.pubsub.activate_subscription(subscription_id) self.addCleanup(self.pubsub.deactivate_subscription, subscription_id) self.dataset_modified = Event() def cb2(*args, **kwargs): self.dataset_modified.set() # TODO: event isn't using the ExternalDataset, but a different ID for a Dataset es = EventSubscriber(event_type=OT.DatasetModified, callback=cb2, origin=self.dataset_id) es.start() self.addCleanup(es.stop) def do_read_dataset(self): self.dams.start_external_dataset_agent_instance( self.agent_instance._id) # # should i wait for process (above) to start # before launching client (below)? # self.client = None end = time.time() + MAX_AGENT_START_TIME while not self.client and time.time() < end: try: self.client = ResourceAgentClient(self.device._id, process=FakeProcess()) except NotFound: time.sleep(2) if not self.client: self.fail( msg='external dataset agent process did not start in %d seconds' % MAX_AGENT_START_TIME) self.client.execute_agent( AgentCommand(command=ResourceAgentEvent.INITIALIZE)) self.client.execute_agent( AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)) self.client.execute_agent(AgentCommand(command=ResourceAgentEvent.RUN)) self.client.execute_resource(command=AgentCommand( command=DriverEvent.START_AUTOSAMPLE)) def assert_data_received(self): #let it go for up to 120 seconds, then stop the agent and reset it if not self.dataset_modified.is_set(): self.dataset_modified.wait(30) self.assertTrue(self.granule_count > 2, msg='granule count = %d' % self.granule_count) rdt = RecordDictionaryTool.load_from_granule(self.granule_capture[0]) self.assertAlmostEqual(0, rdt['oxygen'][0], delta=0.01) self.assertAlmostEqual(309.77, rdt['pressure'][0], delta=0.01) self.assertAlmostEqual(37.9848, rdt['conductivity'][0], delta=0.01) self.assertAlmostEqual(9.5163, rdt['temp'][0], delta=0.01) self.assertAlmostEqual(3527207897.0, rdt['time'][0], delta=1) def do_shutdown(self): self.dams.stop_external_dataset_agent_instance(self.agent_instance._id)
class ExhaustiveParameterTest(IonIntegrationTestCase): def setUp(self): self.i = 0 self._start_container() self.container.start_rel_from_url('res/deploy/r2params.yml') self.dataset_management = DatasetManagementServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() self.resource_registry = self.container.resource_registry self.data_retriever = DataRetrieverServiceClient() pdicts, _ = self.resource_registry.find_resources( restype='ParameterDictionary', id_only=False) self.dp_ids = [] for pdict in pdicts: stream_def_id = self.pubsub_management.create_stream_definition( pdict.name, parameter_dictionary_id=pdict._id) dp_id = self.make_dp(stream_def_id) if dp_id: self.dp_ids.append(dp_id) def make_dp(self, stream_def_id): tdom, sdom = time_series_domain() tdom = tdom.dump() sdom = sdom.dump() stream_def = self.resource_registry.read(stream_def_id) dp_obj = DataProduct(name=stream_def.name, description=stream_def.name, processing_level_code='Parsed_Canonical', temporal_domain=tdom, spatial_domain=sdom) data_product_id = self.data_product_management.create_data_product( dp_obj, stream_definition_id=stream_def_id) self.data_product_management.activate_data_product_persistence( data_product_id) return data_product_id def fill_values(self, ptype, size): if isinstance(ptype, ArrayType): return ['blah'] * size elif isinstance(ptype, QuantityType): return np.sin( np.arange(size, dtype=ptype.value_encoding) * 2 * np.pi / 3) elif isinstance(ptype, RecordType): return [{'record': 'ok'}] * size elif isinstance(ptype, ConstantRangeType): return (1, 1000) elif isinstance(ptype, ConstantType): return np.dtype(ptype.value_encoding).type(1) elif isinstance(ptype, CategoryType): return ptype.categories.keys()[0] else: return def wait_until_we_have_enough_granules(self, dataset_id='', data_size=40): ''' Loops until there is a sufficient amount of data in the dataset ''' done = False with gevent.Timeout(40): while not done: granule = self.data_retriever.retrieve_last_data_points( dataset_id, 1) rdt = RecordDictionaryTool.load_from_granule(granule) extents = self.dataset_management.dataset_extents( dataset_id, rdt._pdict.temporal_parameter_name)[0] if rdt[rdt._pdict.temporal_parameter_name] and rdt[ rdt._pdict. temporal_parameter_name][0] != rdt._pdict.get_context( rdt._pdict.temporal_parameter_name ).fill_value and extents >= data_size: done = True else: gevent.sleep(0.2) def write_to_data_product(self, data_product_id): dataset_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasDataset', id_only=True) dataset_id = dataset_ids.pop() stream_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasStream', id_only=True) stream_id = stream_ids.pop() stream_def_ids, _ = self.resource_registry.find_objects( stream_id, 'hasStreamDefinition', id_only=True) stream_def_id = stream_def_ids.pop() route = self.pubsub_management.read_stream_route(stream_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) time_param = rdt._pdict.temporal_parameter_name if time_param is None: print '%s has no temporal parameter' % self.resource_registry.read( data_product_id).name return rdt[time_param] = np.arange(40) for field in rdt.fields: if field == rdt._pdict.temporal_parameter_name: continue rdt[field] = self.fill_values( rdt._pdict.get_context(field).param_type, 40) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id, 40) granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(granule) bad = [] for field in rdt.fields: if not np.array_equal(rdt[field], rdt_out[field]): print '%s' % field print '%s != %s' % (rdt[field], rdt_out[field]) bad.append(field) return bad def test_data_products(self): bad_data_products = {} for dp_id in self.dp_ids: try: bad_fields = self.write_to_data_product(dp_id) if bad_fields: bad_data_products[ dp_id] = "Couldn't write and retrieve %s." % bad_fields except: import traceback bad_data_products[dp_id] = traceback.format_exc() for dp_id, tb in bad_data_products.iteritems(): print '----------' print 'Problem with %s' % self.resource_registry.read(dp_id).name print tb print '----------' if bad_data_products: raise AssertionError('There are bad parameter dictionaries.')
class TestTransformWorker(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Instantiate a process to represent the test process = TransformWorkerTestProcess() self.dataset_management_client = DatasetManagementServiceClient( node=self.container.node) self.pubsub_client = PubsubManagementServiceClient( node=self.container.node) self.dataproductclient = DataProductManagementServiceClient( node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient( node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient( node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient( node=self.container.node) self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.imsclient = InstrumentManagementServiceProcessClient( node=self.container.node, process=process) self.time_dom, self.spatial_dom = time_series_domain() self.ph = ParameterHelper(self.dataset_management_client, self.addCleanup) self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10) def push_granule(self, data_product_id): ''' Publishes and monitors that the granule arrived ''' datasets, _ = self.rrclient.find_objects(data_product_id, PRED.hasDataset, id_only=True) dataset_monitor = DatasetMonitor(datasets[0]) rdt = self.ph.rdt_for_data_product(data_product_id) self.ph.fill_parsed_rdt(rdt) self.ph.publish_rdt_to_data_product(data_product_id, rdt) assert dataset_monitor.wait() dataset_monitor.stop() @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.dp_list = [] self.data_process_objs = [] self._output_stream_ids = [] self.granule_verified = Event() self.worker_assigned_event_verified = Event() self.dp_created_event_verified = Event() self.heartbeat_event_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] self.start_event_listener() # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process( ) self.dp_list.append(dataprocess_id) # validate the repository for data product algorithms persists the new resources NEW SA-1 # create_data_process call created one of each dpd_ids, _ = self.rrclient.find_resources( restype=OT.DataProcessDefinition, id_only=False) # there will be more than one becuase of the DPDs that reperesent the PFs in the data product above self.assertTrue(dpd_ids is not None) dp_ids, _ = self.rrclient.find_resources(restype=OT.DataProcess, id_only=False) # only one DP becuase the PFs that are in the code dataproduct above are not activated yet. self.assertEquals(len(dp_ids), 1) # validate the name and version label NEW SA - 2 dataprocessdef_obj = self.dataprocessclient.read_data_process_definition( dataprocessdef_id) self.assertEqual(dataprocessdef_obj.version_label, '1.0a') self.assertEqual(dataprocessdef_obj.name, 'add_arrays') # validate that the DPD has an attachment NEW SA - 21 attachment_ids, assoc_ids = self.rrclient.find_objects( dataprocessdef_id, PRED.hasAttachment, RT.Attachment, True) self.assertEqual(len(attachment_ids), 1) attachment_obj = self.rrclient.read_attachment(attachment_ids[0]) log.debug('attachment: %s', attachment_obj) # validate that the data process resource has input and output data products associated # L4-CI-SA-RQ-364 and NEW SA-3 outproduct_ids, assoc_ids = self.rrclient.find_objects( dataprocess_id, PRED.hasOutputProduct, RT.DataProduct, True) self.assertEqual(len(outproduct_ids), 1) inproduct_ids, assoc_ids = self.rrclient.find_objects( dataprocess_id, PRED.hasInputProduct, RT.DataProduct, True) self.assertEqual(len(inproduct_ids), 1) # Test for provenance. Get Data product produced by the data processes output_data_product_id, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance( output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 2) self.assertTrue(self.input_dp_id in output_data_product_provenance[ output_data_product_id[0]]['parents']) self.assertTrue(output_data_product_provenance[ output_data_product_id[0]]['parents'][self.input_dp_id] ['data_process_definition_id'] == dataprocessdef_id) # NEW SA - 4 | Data processing shall include the appropriate data product algorithm name and version number in # the metadata of each output data product created by the data product algorithm. output_data_product_obj, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=False) self.assertTrue(output_data_product_obj[0].name != None) self.assertTrue(output_data_product_obj[0]._rev != None) # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=dataprocess_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route) for n in range(1, 101): rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) # validate that the output granule is received and the updated value is correct self.assertTrue(self.granule_verified.wait(self.wait_time)) # validate that the data process loaded into worker event is received (L4-CI-SA-RQ-182) self.assertTrue( self.worker_assigned_event_verified.wait(self.wait_time)) # validate that the data process create (with data product ids) event is received (NEW SA -42) self.assertTrue(self.dp_created_event_verified.wait(self.wait_time)) # validate that the data process heartbeat event is received (for every hundred granules processed) (L4-CI-SA-RQ-182) #this takes a while so set wait limit to large value self.assertTrue(self.heartbeat_event_verified.wait(200)) # validate that the code from the transform function can be retrieve via inspect_data_process_definition src = self.dataprocessclient.inspect_data_process_definition( dataprocessdef_id) self.assertIn('def add_arrays(a, b)', src) # now delete the DPD and DP then verify that the resources are retired so that information required for provenance are still available self.dataprocessclient.delete_data_process(dataprocess_id) self.dataprocessclient.delete_data_process_definition( dataprocessdef_id) in_dp_objs, _ = self.rrclient.find_objects( subject=dataprocess_id, predicate=PRED.hasInputProduct, object_type=RT.DataProduct, id_only=True) self.assertTrue(in_dp_objs is not None) dpd_objs, _ = self.rrclient.find_subjects( subject_type=RT.DataProcessDefinition, predicate=PRED.hasDataProcess, object=dataprocess_id, id_only=True) self.assertTrue(dpd_objs is not None) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker_with_instrumentdevice(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # Create CTD Parsed as the initial data product # create a stream definition for the data from the ctd simulator self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id) # only ever need one device for testing purposes. instDevice_obj, _ = self.rrclient.find_resources( restype=RT.InstrumentDevice, name='test_ctd_device') if instDevice_obj: instDevice_id = instDevice_obj[0]._id else: instDevice_obj = IonObject(RT.InstrumentDevice, name='test_ctd_device', description="test_ctd_device", serial_number="12345") instDevice_id = self.imsclient.create_instrument_device( instrument_device=instDevice_obj) self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=self.input_dp_id) # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process( ) self.addCleanup(self.dataprocessclient.delete_data_process, dataprocess_id) self.addCleanup(self.dataprocessclient.delete_data_process_definition, dataprocessdef_id) # Test for provenance. Get Data product produced by the data processes output_data_product_id, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance( output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 3) self.assertTrue(self.input_dp_id in output_data_product_provenance[ output_data_product_id[0]]['parents']) self.assertTrue(instDevice_id in output_data_product_provenance[ self.input_dp_id]['parents']) self.assertTrue(output_data_product_provenance[instDevice_id]['type'] == 'InstrumentDevice') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker_with_platformdevice(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # Create CTD Parsed as the initial data product # create a stream definition for the data from the ctd simulator self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id) # only ever need one device for testing purposes. platform_device_obj, _ = self.rrclient.find_resources( restype=RT.PlatformDevice, name='TestPlatform') if platform_device_obj: platform_device_id = platform_device_obj[0]._id else: platform_device_obj = IonObject(RT.PlatformDevice, name='TestPlatform', description="TestPlatform", serial_number="12345") platform_device_id = self.imsclient.create_platform_device( platform_device=platform_device_obj) self.damsclient.assign_data_product( input_resource_id=platform_device_id, data_product_id=self.input_dp_id) # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process( ) self.addCleanup(self.dataprocessclient.delete_data_process, dataprocess_id) self.addCleanup(self.dataprocessclient.delete_data_process_definition, dataprocessdef_id) # Test for provenance. Get Data product produced by the data processes output_data_product_id, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance( output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 3) self.assertTrue(self.input_dp_id in output_data_product_provenance[ output_data_product_id[0]]['parents']) self.assertTrue(platform_device_id in output_data_product_provenance[ self.input_dp_id]['parents']) self.assertTrue(output_data_product_provenance[platform_device_id] ['type'] == 'PlatformDevice') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_event_transform_worker(self): self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # test that a data process (type: data-product-in / event-out) can be defined and launched. # verify that event fields are correctly populated self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] # create the DPD and two DPs self.event_data_process_id = self.create_event_data_processes() # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=self.event_data_process_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_event_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route) self.start_event_transform_listener() self.data_modified = Event() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) self.assertTrue(self.event_verified.wait(self.wait_time)) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_bad_argument_map(self): self._output_stream_ids = [] # test that a data process (type: data-product-in / data-product-out) parameter mapping it validated during # data process creation and that the correct exception is raised for both input and output. self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # two data processes using one transform and one DPD dp1_func_output_dp_id = self.create_output_data_product() # Set up DPD and DP #2 - array add function tf_obj = IonObject( RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM, uri= 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject( RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS) add_array_dpd_id = self.dataprocessclient.create_data_process_definition( data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition( self.stream_def_id, add_array_dpd_id, binding='add_array_func') # create the data process with invalid argument map argument_map = {"arr1": "foo", "arr2": "bar"} output_param = "salinity" with self.assertRaises(BadRequest) as cm: dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) ex = cm.exception log.debug(' exception raised: %s', cm) self.assertEqual( ex.message, "Input data product does not contain the parameters defined in argument map" ) # create the data process with invalid output parameter name argument_map = {"arr1": "conductivity", "arr2": "pressure"} output_param = "foo" with self.assertRaises(BadRequest) as cm: dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) ex = cm.exception log.debug(' exception raised: %s', cm) self.assertEqual( ex.message, "Output data product does not contain the output parameter name provided" ) def create_event_data_processes(self): # two data processes using one transform and one DPD argument_map = {"a": "salinity"} # set up DPD and DP #2 - array add function tf_obj = IonObject( RT.TransformFunction, name='validate_salinity_array', description='validate_salinity_array', function='validate_salinity_array', module="ion.processes.data.transforms.test.test_transform_worker", arguments=['a'], function_type=TransformFunctionType.TRANSFORM) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject( RT.DataProcessDefinition, name='validate_salinity_array', description='validate_salinity_array', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, ) add_array_dpd_id = self.dataprocessclient.create_data_process_definition( data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition( self.stream_def_id, add_array_dpd_id, binding='validate_salinity_array') # create the data process dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=None, argument_map=argument_map) self.damsclient.register_process(dp1_data_process_id) self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id) return dp1_data_process_id def create_data_process(self): # two data processes using one transform and one DPD dp1_func_output_dp_id = self.create_output_data_product() argument_map = {"arr1": "conductivity", "arr2": "pressure"} output_param = "salinity" # set up DPD and DP #2 - array add function tf_obj = IonObject( RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM, uri= 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject( RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, version_label='1.0a') add_array_dpd_id = self.dataprocessclient.create_data_process_definition( data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition( self.stream_def_id, add_array_dpd_id, binding='add_array_func') # create the data process dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) self.damsclient.register_process(dp1_data_process_id) #self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id) # add an attachment object to this DPD to test new SA-21 import msgpack attachment_content = 'foo bar' attachment_obj = IonObject(RT.Attachment, name='test_attachment', attachment_type=AttachmentType.ASCII, content_type='text/plain', content=msgpack.packb(attachment_content)) att_id = self.rrclient.create_attachment(add_array_dpd_id, attachment_obj) self.addCleanup(self.rrclient.delete_attachment, att_id) return add_array_dpd_id, dp1_data_process_id, dp1_func_output_dp_id def create_output_data_product(self): dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition( name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id) dp1_output_dp_obj = IonObject(RT.DataProduct, name='data_process1_data_product', description='output of add array func') dp1_func_output_dp_id = self.dataproductclient.create_data_product( dp1_output_dp_obj, dp1_outgoing_stream_id) self.addCleanup(self.dataproductclient.delete_data_product, dp1_func_output_dp_id) # retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id, PRED.hasStream, None, True) self._output_stream_ids.append(stream_ids[0]) subscription_id = self.pubsub_client.create_subscription( 'validator', data_product_ids=[dp1_func_output_dp_id]) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) def on_granule(msg, route, stream_id): log.debug('recv_packet stream_id: %s route: %s msg: %s', stream_id, route, msg) self.validate_output_granule(msg, route, stream_id) self.granule_verified.set() validator = StandaloneStreamSubscriber('validator', callback=on_granule) validator.start() self.addCleanup(validator.stop) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) return dp1_func_output_dp_id def validate_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ data_process_event = args[0] log.debug("DataProcessStatusEvent: %s", str(data_process_event.__dict__)) # if data process already created, check origin if self.dp_list: self.assertIn(data_process_event.origin, self.dp_list) # if this is a heartbeat event then 100 granules have been processed if 'data process status update.' in data_process_event.description: self.heartbeat_event_verified.set() else: # else check that this is the assign event if 'Data process assigned to transform worker' in data_process_event.description: self.worker_assigned_event_verified.set() elif 'Data process created for data product' in data_process_event.description: self.dp_created_event_verified.set() def validate_output_granule(self, msg, route, stream_id): self.assertIn(stream_id, self._output_stream_ids) rdt = RecordDictionaryTool.load_from_granule(msg) log.debug('validate_output_granule rdt: %s', rdt) sal_val = rdt['salinity'] np.testing.assert_array_equal(sal_val, np.array([3])) def start_event_listener(self): es = EventSubscriber(event_type=OT.DataProcessStatusEvent, callback=self.validate_event) es.start() self.addCleanup(es.stop) def validate_transform_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ status_alert_event = args[0] np.testing.assert_array_equal(status_alert_event.origin, self.stream_id) np.testing.assert_array_equal(status_alert_event.values, np.array([self.event_data_process_id])) log.debug("DeviceStatusAlertEvent: %s", str(status_alert_event.__dict__)) self.event_verified.set() def start_event_transform_listener(self): es = EventSubscriber(event_type=OT.DeviceStatusAlertEvent, callback=self.validate_transform_event) es.start() self.addCleanup(es.stop) def test_download(self): egg_url = 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' egg_path = TransformWorker.download_egg(egg_url) import pkg_resources pkg_resources.working_set.add_entry(egg_path) from ion_example.add_arrays import add_arrays a = add_arrays(1, 2) self.assertEquals(a, 3)
class TestTransformPrime(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Because hey why not?! self.dataset_management = DatasetManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() def _create_proc_def(self): dpd_obj = DataProcessDefinition( name='Optimus', description='It\'s a transformer', module='ion.processes.data.transforms.transform_prime', class_name='TransformPrime') return self.data_process_management.create_data_process_definition(dpd_obj) def _L0_pdict(self): t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=np.dtype('int64'))) t_ctxt.uom = 'seconds since 01-01-1900' t_ctxt.fill_value = -9999 t_ctxt_id = self.dataset_management.create_parameter_context(name='time', parameter_context=t_ctxt.dump(), parameter_type='quantity<int64>', unit_of_measure=t_ctxt.uom) lat_ctxt = ParameterContext('lat', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32')))) lat_ctxt.axis = AxisTypeEnum.LAT lat_ctxt.uom = 'degree_north' lat_ctxt.fill_value = -9999 lat_ctxt_id = self.dataset_management.create_parameter_context(name='lat', parameter_context=lat_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=lat_ctxt.uom) lon_ctxt = ParameterContext('lon', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32')))) lon_ctxt.axis = AxisTypeEnum.LON lon_ctxt.uom = 'degree_east' lon_ctxt.fill_value = -9999 lon_ctxt_id = self.dataset_management.create_parameter_context(name='lon', parameter_context=lon_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=lon_ctxt.uom) temp_ctxt = ParameterContext('TEMPWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32'))) temp_ctxt.uom = 'deg_C' temp_ctxt.fill_value = -9999 temp_ctxt_id = self.dataset_management.create_parameter_context(name='TEMPWAT_L0', parameter_context=temp_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=temp_ctxt.uom) # Conductivity - values expected to be the decimal results of conversion from hex cond_ctxt = ParameterContext('CONDWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32'))) cond_ctxt.uom = 'S m-1' cond_ctxt.fill_value = -9999 cond_ctxt_id = self.dataset_management.create_parameter_context(name='CONDWAT_L0', parameter_context=cond_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=cond_ctxt.uom) # Pressure - values expected to be the decimal results of conversion from hex press_ctxt = ParameterContext('PRESWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32'))) press_ctxt.uom = 'dbar' press_ctxt.fill_value = -9999 press_ctxt_id = self.dataset_management.create_parameter_context(name='PRESWAT_L0', parameter_context=press_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=press_ctxt.uom) context_ids = [t_ctxt_id, lat_ctxt_id, lon_ctxt_id, temp_ctxt_id, cond_ctxt_id, press_ctxt_id] pdict_id = self.dataset_management.create_parameter_dictionary('L0 SBE37', parameter_context_ids=context_ids, temporal_context='time') return pdict_id def _L1_pdict(self): pdict_id = self._L0_pdict() param_context_ids = self.dataset_management.read_parameter_contexts(pdict_id,id_only=True) # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 tl1_func = '(TEMPWAT_L0 / 10000) - 10' tl1_pmap = {'TEMPWAT_L0':'TEMPWAT_L0'} func = NumexprFunction('TEMPWAT_L1', tl1_func, tl1_pmap) tempL1_ctxt = ParameterContext('TEMPWAT_L1', param_type=ParameterFunctionType(function=func), variability=VariabilityEnum.TEMPORAL) tempL1_ctxt.uom = 'deg_C' tempL1_ctxt_id = self.dataset_management.create_parameter_context(name=tempL1_ctxt.name, parameter_context=tempL1_ctxt.dump(), parameter_type='pfunc', unit_of_measure=tempL1_ctxt.uom) param_context_ids.append(tempL1_ctxt_id) # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 cl1_func = '(CONDWAT_L0 / 100000) - 0.5' cl1_pmap = {'CONDWAT_L0':'CONDWAT_L0'} func = NumexprFunction('CONDWAT_L1', cl1_func, cl1_pmap) condL1_ctxt = ParameterContext('CONDWAT_L1', param_type=ParameterFunctionType(function=func), variability=VariabilityEnum.TEMPORAL) condL1_ctxt.uom = 'S m-1' condL1_ctxt_id = self.dataset_management.create_parameter_context(name=condL1_ctxt.name, parameter_context=condL1_ctxt.dump(), parameter_type='pfunc', unit_of_measure=condL1_ctxt.uom) param_context_ids.append(condL1_ctxt_id) # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) pl1_func = '(PRESWAT_L0 * 679.34040721 / (0.85 * 65536)) - (0.05 * 679.34040721)' pl1_pmap = {'PRESWAT_L0':'PRESWAT_L0'} func = NumexprFunction('PRESWAT_L1', pl1_func, pl1_pmap) presL1_ctxt = ParameterContext('PRESWAT_L1', param_type=ParameterFunctionType(function=func), variability=VariabilityEnum.TEMPORAL) presL1_ctxt.uom = 'S m-1' presL1_ctxt_id = self.dataset_management.create_parameter_context(name=presL1_ctxt.name, parameter_context=presL1_ctxt.dump(), parameter_type='pfunc', unit_of_measure=presL1_ctxt.uom) param_context_ids.append(presL1_ctxt_id) # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project: # https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1 # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) owner = 'gsw' sal_func = 'SP_from_C' sal_arglist = [NumexprFunction('CONDWAT_L1*10', 'C*10', {'C':'CONDWAT_L1'}), 'TEMPWAT_L1', 'PRESWAT_L1'] sal_kwargmap = None func = PythonFunction('PRACSAL', owner, sal_func, sal_arglist, sal_kwargmap) sal_ctxt = ParameterContext('PRACSAL', param_type=ParameterFunctionType(func), variability=VariabilityEnum.TEMPORAL) sal_ctxt.uom = 'g kg-1' sal_ctxt_id = self.dataset_management.create_parameter_context(name=sal_ctxt.name, parameter_context=sal_ctxt.dump(), parameter_type='pfunc', unit_of_measure=sal_ctxt.uom) param_context_ids.append(sal_ctxt_id) # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) owner = 'gsw' abs_sal_func = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'lon','lat'], None) #abs_sal_func = PythonFunction('abs_sal', owner, 'SA_from_SP', ['lon','lat'], None) cons_temp_func = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_func, 'TEMPWAT_L1', 'PRESWAT_L1'], None) dens_func = PythonFunction('DENSITY', owner, 'rho', [abs_sal_func, cons_temp_func, 'PRESWAT_L1'], None) dens_ctxt = ParameterContext('DENSITY', param_type=ParameterFunctionType(dens_func), variability=VariabilityEnum.TEMPORAL) dens_ctxt.uom = 'kg m-3' dens_ctxt_id = self.dataset_management.create_parameter_context(name=dens_ctxt.name, parameter_context=dens_ctxt.dump(), parameter_type='pfunc', unit_of_measure=dens_ctxt.uom) param_context_ids.append(dens_ctxt_id) pdict_id = self.dataset_management.create_parameter_dictionary('L1_SBE37', parameter_context_ids=param_context_ids, temporal_context='time') return pdict_id def _data_product(self, name, stream_def, exchange_pt): tdom, sdom = time_series_domain() dp_obj = DataProduct(name=name, description='blah', spatial_domain=sdom.dump(), temporal_domain=tdom.dump()) dp_id = self.data_product_management.create_data_product(dp_obj, stream_def, exchange_pt) return dp_id def _data_process(self, proc_def_id, input_products, output_product, stream_def): fake_producer = DataProducer(name='fake_producer') fake_producer_id, _ = self.container.resource_registry.create(fake_producer) self.data_process_management.assign_stream_definition_to_data_process_definition(stream_def,proc_def_id,binding='output') data_process_id = self.data_process_management.create_data_process(proc_def_id, input_products, {'output':output_product}) self.container.resource_registry.create_association(subject=data_process_id, predicate=PRED.hasDataProducer, object=fake_producer_id) self.data_process_management.activate_data_process(data_process_id) def _fake_producer(self): if not hasattr(self, 'producer'): self.fake_producer_id,_ = self.container.resource_registry.create(DataProducer(name='fake_producer')) return self.fake_producer_id def _publisher(self, data_product_id): stream_ids, _ = self.container.resource_registry.find_resources(subject=data_product_id, predicate=PRED.hasStream, id_only=True) stream_id = stream_ids[0] route = self.pubsub_management.read_stream_route(stream_id) publisher = StandaloneStreamPublisher(stream_id, route) return publisher def _get_param_vals(self, name, slice_, dims): shp = utils.slice_shape(slice_, dims) def _getarr(vmin, shp, vmax=None,): if vmax is None: return np.empty(shp).fill(vmin) return np.arange(vmin, vmax, (vmax - vmin) / int(utils.prod(shp)), dtype='float32').reshape(shp) if name == 'LAT': ret = np.empty(shp) ret.fill(45) elif name == 'LON': ret = np.empty(shp) ret.fill(-71) elif name == 'TEMPWAT_L0': ret = _getarr(280000, shp, 350000) elif name == 'CONDWAT_L0': ret = _getarr(100000, shp, 750000) elif name == 'PRESWAT_L0': ret = _getarr(3000, shp, 10000) elif name in self.value_classes: # Non-L0 parameters ret = self.value_classes[name][:] else: return np.zeros(shp) return ret def _setup_streams(self, exchange_pt1, exchange_pt2, available_fields_in=[], available_fields_out=[]): proc_def_id = self._create_proc_def() incoming_pdict_id = self._L0_pdict() outgoing_pdict_id = self._L1_pdict() incoming_stream_def_id = self.pubsub_management.create_stream_definition('L0_stream_def', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('L1_stream_def', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) L0_data_product_id = self._data_product('L0_SBE37', incoming_stream_def_id, exchange_pt1) L1_data_product_id = self._data_product('L1_SBE37', outgoing_stream_def_id, exchange_pt2) self._data_process(proc_def_id, [L0_data_product_id], L1_data_product_id, outgoing_stream_def_id) stream_ids, _ = self.container.resource_registry.find_objects(L0_data_product_id, PRED.hasStream, None, True) stream_id_in = stream_ids[0] stream_ids, _ = self.container.resource_registry.find_objects(L1_data_product_id, PRED.hasStream, None, True) stream_id_out = stream_ids[0] stream_route_in = self.pubsub_management.read_stream_route(stream_id_in) stream_route_out = self.pubsub_management.read_stream_route(stream_id_out) return (stream_id_in,stream_id_out,stream_route_in,stream_route_out,incoming_stream_def_id,outgoing_stream_def_id) def _validate_transforms(self, rdt_in, rdt_out): #passthrus self.assertTrue(np.allclose(rdt_in['time'], rdt_out['time'])) self.assertTrue(np.allclose(rdt_in['lat'], rdt_out['lat'])) self.assertTrue(np.allclose(rdt_in['lon'], rdt_out['lon'])) self.assertTrue(np.allclose(rdt_in['TEMPWAT_L0'], rdt_out['TEMPWAT_L0'])) self.assertTrue(np.allclose(rdt_in['CONDWAT_L0'], rdt_out['CONDWAT_L0'])) self.assertTrue(np.allclose(rdt_in['PRESWAT_L0'], rdt_out['PRESWAT_L0'])) # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 t1 = (rdt_out['TEMPWAT_L0'] / 10000) - 10 self.assertTrue(np.allclose(rdt_out['TEMPWAT_L1'], t1)) # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 c1 = (rdt_out['CONDWAT_L0'] / 100000) - 0.5 self.assertTrue(np.allclose(rdt_out['CONDWAT_L1'], c1)) # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) p1 = (rdt_out['PRESWAT_L0'] * 679.34040721 / (0.85 * 65536)) - (0.05 * 679.34040721) self.assertTrue(np.allclose(rdt_out['PRESWAT_L1'], p1)) # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) ps = gsw.SP_from_C((rdt_out['CONDWAT_L1'] * 10.), rdt_out['TEMPWAT_L1'], rdt_out['PRESWAT_L1']) self.assertTrue(np.allclose(rdt_out['PRACSAL'], ps)) # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) abs_sal = gsw.SA_from_SP(rdt_out['PRACSAL'], rdt_out['PRESWAT_L1'], rdt_out['lon'], rdt_out['lat']) cons_temp = gsw.CT_from_t(abs_sal, rdt_out['TEMPWAT_L1'], rdt_out['PRESWAT_L1']) rho = gsw.rho(abs_sal, cons_temp, rdt_out['PRESWAT_L1']) self.assertTrue(np.allclose(rdt_out['DENSITY'], rho)) def test_execute_transform(self): available_fields_in = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'TEMPWAT_L1','CONDWAT_L1','PRESWAT_L1','PRACSAL', 'DENSITY'] exchange_pt1 = 'xp1' exchange_pt2 = 'xp2' stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out) rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id) dt = 20 rdt_in['time'] = np.arange(dt) rdt_in['lat'] = [40.992469] * dt rdt_in['lon'] = [-71.727069] * dt rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,)) rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,)) rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,)) msg = rdt_in.to_granule() #pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',{'process':{'routes':{(stream_id_in, stream_id_out):None},'stream_id':stream_id_out}}) config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}} pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config) rdt_out = self.container.proc_manager.procs[pid]._execute_transform(msg, (stream_id_in,stream_id_out)) #need below to wrap result in a param val object rdt_out = RecordDictionaryTool.load_from_granule(rdt_out.to_granule()) for k,v in rdt_out.iteritems(): self.assertEqual(len(v), dt) self._validate_transforms(rdt_in, rdt_out) self.container.proc_manager.terminate_process(pid) def test_transform_prime_no_available_fields(self): available_fields_in = [] available_fields_out = [] exchange_pt1 = 'xp1' exchange_pt2 = 'xp2' stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out) #launch transform config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}} pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config) #create publish publisher = StandaloneStreamPublisher(stream_id_in, stream_route_in) self.container.proc_manager.procs[pid].subscriber.xn.bind(stream_route_in.routing_key, publisher.xp) #data rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id) dt = 20 rdt_in['time'] = np.arange(dt) rdt_in['lat'] = [40.992469] * dt rdt_in['lon'] = [-71.727069] * dt rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,)) rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,)) rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,)) msg = rdt_in.to_granule() #publish granule to transform and have transform publish it to subsciber #validate transformed data e = gevent.event.Event() def cb(msg, sr, sid): self.assertEqual(sid, stream_id_out) rdt_out = RecordDictionaryTool.load_from_granule(msg) self.assertEquals(set([k for k,v in rdt_out.iteritems()]), set(available_fields_out)) for k,v in rdt_out.iteritems(): self.assertEquals(rdt_out[k], None) e.set() sub = StandaloneStreamSubscriber('stream_subscriber', cb) sub.xn.bind(stream_route_out.routing_key, getattr(self.container.proc_manager.procs[pid], stream_id_out).xp) self.addCleanup(sub.stop) sub.start() #publish msg to transform publisher.publish(msg) #wait to receive msg self.assertTrue(e.wait(4)) #self.container.proc_manager.terminate_process(pid) def test_transform_prime(self): available_fields_in = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'TEMPWAT_L1','CONDWAT_L1','PRESWAT_L1','PRACSAL', 'DENSITY'] exchange_pt1 = 'xp1' exchange_pt2 = 'xp2' stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out) #launch transform config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}} pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config) #create publish publisher = StandaloneStreamPublisher(stream_id_in, stream_route_in) self.container.proc_manager.procs[pid].subscriber.xn.bind(stream_route_in.routing_key, publisher.xp) #data rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id) dt = 20 rdt_in['time'] = np.arange(dt) rdt_in['lat'] = [40.992469] * dt rdt_in['lon'] = [-71.727069] * dt rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,)) rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,)) rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,)) msg = rdt_in.to_granule() #publish granule to transform and have transform publish it to subsciber #validate transformed data e = gevent.event.Event() def cb(msg, sr, sid): self.assertEqual(sid, stream_id_out) rdt_out = RecordDictionaryTool.load_from_granule(msg) self.assertEquals(set([k for k,v in rdt_out.iteritems()]), set(available_fields_out)) self._validate_transforms(rdt_in, rdt_out) e.set() sub = StandaloneStreamSubscriber('stream_subscriber', cb) sub.xn.bind(stream_route_out.routing_key, getattr(self.container.proc_manager.procs[pid], stream_id_out).xp) self.addCleanup(sub.stop) sub.start() #publish msg to transform publisher.publish(msg) #wait to receive msg self.assertTrue(e.wait(4))
class TestDataProductManagementServiceIntegration(IonIntegrationTestCase): def setUp(self): # Start container #print 'instantiating container' self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dpsc_cli = DataProductManagementServiceClient() self.rrclient = ResourceRegistryServiceClient() self.damsclient = DataAcquisitionManagementServiceClient() self.pubsubcli = PubsubManagementServiceClient() self.ingestclient = IngestionManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.unsc = UserNotificationServiceClient() self.data_retriever = DataRetrieverServiceClient() self.identcli = IdentityManagementServiceClient() #------------------------------------------ # Create the environment #------------------------------------------ self.stream_def_id = self.pubsubcli.create_stream_definition(name='SBE37_CDM') self.process_definitions = {} ingestion_worker_definition = ProcessDefinition(name='ingestion worker') ingestion_worker_definition.executable = { 'module':'ion.processes.data.ingestion.science_granule_ingestion_worker', 'class' :'ScienceGranuleIngestionWorker' } process_definition_id = self.process_dispatcher.create_process_definition(process_definition=ingestion_worker_definition) self.process_definitions['ingestion_worker'] = process_definition_id self.pids = [] self.exchange_points = [] self.exchange_names = [] #------------------------------------------------------------------------------------------------ # First launch the ingestors #------------------------------------------------------------------------------------------------ self.exchange_space = 'science_granule_ingestion' self.exchange_point = 'science_data' config = DotDict() config.process.datastore_name = 'datasets' config.process.queue_name = self.exchange_space self.exchange_names.append(self.exchange_space) self.exchange_points.append(self.exchange_point) pid = self.process_dispatcher.schedule_process(self.process_definitions['ingestion_worker'],configuration=config) log.debug("the ingestion worker process id: %s", pid) self.pids.append(pid) self.addCleanup(self.cleaning_up) def cleaning_up(self): for pid in self.pids: log.debug("number of pids to be terminated: %s", len(self.pids)) try: self.process_dispatcher.cancel_process(pid) log.debug("Terminated the process: %s", pid) except: log.debug("could not terminate the process id: %s" % pid) IngestionManagementIntTest.clean_subscriptions() for xn in self.exchange_names: xni = self.container.ex_manager.create_xn_queue(xn) xni.delete() for xp in self.exchange_points: xpi = self.container.ex_manager.create_xp(xp) xpi.delete() def get_datastore(self, dataset_id): dataset = self.dataset_management.read_dataset(dataset_id) datastore_name = dataset.datastore_name datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA) return datastore @attr('EXT') @attr('PREP') def test_create_data_product(self): #------------------------------------------------------------------------------------------------ # create a stream definition for the data from the ctd simulator #------------------------------------------------------------------------------------------------ parameter_dictionary = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict') ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=parameter_dictionary._id) log.debug("Created stream def id %s" % ctd_stream_def_id) #------------------------------------------------------------------------------------------------ # test creating a new data product w/o a stream definition #------------------------------------------------------------------------------------------------ dp_obj = IonObject(RT.DataProduct, name='DP1', description='some new dp') dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 10.0 dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -10.0 dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 10.0 dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -10.0 dp_obj.ooi_product_name = "PRODNAME" #------------------------------------------------------------------------------------------------ # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary #------------------------------------------------------------------------------------------------ dp_id = self.dpsc_cli.create_data_product( data_product= dp_obj, stream_definition_id=ctd_stream_def_id) # Assert that the data product has an associated stream at this stage stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True) self.assertNotEquals(len(stream_ids), 0) # Assert that the data product has an associated stream def at this stage stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStreamDefinition, RT.StreamDefinition, True) self.assertNotEquals(len(stream_ids), 0) self.dpsc_cli.activate_data_product_persistence(dp_id) dp_obj = self.dpsc_cli.read_data_product(dp_id) self.assertIsNotNone(dp_obj) self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0) log.debug('Created data product %s', dp_obj) #------------------------------------------------------------------------------------------------ # test creating a new data product with a stream definition #------------------------------------------------------------------------------------------------ log.debug('Creating new data product with a stream definition') dp_obj = IonObject(RT.DataProduct, name='DP2', description='some new dp') dp_id2 = self.dpsc_cli.create_data_product(dp_obj, ctd_stream_def_id) self.dpsc_cli.activate_data_product_persistence(dp_id2) log.debug('new dp_id = %s' % dp_id2) #------------------------------------------------------------------------------------------------ #make sure data product is associated with stream def #------------------------------------------------------------------------------------------------ streamdefs = [] streams, _ = self.rrclient.find_objects(dp_id2, PRED.hasStream, RT.Stream, True) for s in streams: log.debug("Checking stream %s" % s) sdefs, _ = self.rrclient.find_objects(s, PRED.hasStreamDefinition, RT.StreamDefinition, True) for sd in sdefs: log.debug("Checking streamdef %s" % sd) streamdefs.append(sd) self.assertIn(ctd_stream_def_id, streamdefs) group_names = self.dpsc_cli.get_data_product_group_list() self.assertIn("PRODNAME", group_names) #---------------------------------------------------------------------------------------- # Create users then notifications to this data product for each user #---------------------------------------------------------------------------------------- # user_1 user_1 = UserInfo() user_1.name = 'user_1' user_1.contact.email = '*****@*****.**' # user_2 user_2 = UserInfo() user_2.name = 'user_2' user_2.contact.email = '*****@*****.**' #user1 is a complete user self.subject = "/DC=org/DC=cilogon/C=US/O=ProtectNetwork/CN=Roger Unwin A254" actor_identity_obj = IonObject("ActorIdentity", {"name": self.subject}) actor_id = self.identcli.create_actor_identity(actor_identity_obj) user_credentials_obj = IonObject("UserCredentials", {"name": self.subject}) self.identcli.register_user_credentials(actor_id, user_credentials_obj) user_id_1 = self.identcli.create_user_info(actor_id, user_1) user_id_2, _ = self.rrclient.create(user_2) delivery_config1a = IonObject(OT.DeliveryConfiguration, email='*****@*****.**', mode=DeliveryModeEnum.EMAIL, frequency=NotificationFrequencyEnum.BATCH) delivery_config1b = IonObject(OT.DeliveryConfiguration, email='*****@*****.**', mode=DeliveryModeEnum.EMAIL, frequency=NotificationFrequencyEnum.BATCH) notification_request_1 = NotificationRequest( name = "notification_1", origin=dp_id, origin_type="type_1", event_type=OT.ResourceLifecycleEvent, disabled_by_system = False, delivery_configurations=[delivery_config1a, delivery_config1b]) delivery_config2a = IonObject(OT.DeliveryConfiguration, email='*****@*****.**', mode=DeliveryModeEnum.EMAIL, frequency=NotificationFrequencyEnum.BATCH) delivery_config2b = IonObject(OT.DeliveryConfiguration, email='*****@*****.**', mode=DeliveryModeEnum.EMAIL, frequency=NotificationFrequencyEnum.BATCH) notification_request_2 = NotificationRequest( name = "notification_2", origin=dp_id, origin_type="type_2", disabled_by_system = False, event_type=OT.DetectionEvent, delivery_configurations=[delivery_config2a, delivery_config2b]) notification_request_1_id = self.unsc.create_notification(notification=notification_request_1, user_id=user_id_1) notification_request_2_id = self.unsc.create_notification(notification=notification_request_2, user_id=user_id_2) self.unsc.delete_notification(notification_request_1_id) # test reading a non-existent data product log.debug('reading non-existent data product') with self.assertRaises(NotFound): dp_obj = self.dpsc_cli.read_data_product('some_fake_id') # update a data product (tests read also) log.debug('Updating data product') # first get the existing dp object dp_obj = self.dpsc_cli.read_data_product(dp_id) # now tweak the object dp_obj.description = 'the very first dp' dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 20.0 dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -20.0 dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 20.0 dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -20.0 # now write the dp back to the registry update_result = self.dpsc_cli.update_data_product(dp_obj) # now get the dp back to see if it was updated dp_obj = self.dpsc_cli.read_data_product(dp_id) self.assertEquals(dp_obj.description,'the very first dp') self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0) log.debug('Updated data product %s', dp_obj) #test extension extended_product = self.dpsc_cli.get_data_product_extension(dp_id) #validate that there is one active and one retired user notification for this data product self.assertEqual(1, len(extended_product.computed.active_user_subscriptions.value)) self.assertEqual(1, len(extended_product.computed.past_user_subscriptions.value)) self.assertEqual(dp_id, extended_product._id) self.assertEqual(ComputedValueAvailability.PROVIDED, extended_product.computed.product_download_size_estimated.status) self.assertEqual(0, extended_product.computed.product_download_size_estimated.value) self.assertEqual(ComputedValueAvailability.PROVIDED, extended_product.computed.parameters.status) #log.debug("test_create_data_product: parameters %s" % extended_product.computed.parameters.value) def ion_object_encoder(obj): return obj.__dict__ #test prepare for create data_product_data = self.dpsc_cli.prepare_data_product_support() #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2) self.assertEqual(data_product_data._id, "") self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport) self.assertEqual(len(data_product_data.associations['StreamDefinition'].resources), 2) self.assertEqual(len(data_product_data.associations['Dataset'].resources), 0) self.assertEqual(len(data_product_data.associations['StreamDefinition'].associated_resources), 0) self.assertEqual(len(data_product_data.associations['Dataset'].associated_resources), 0) #test prepare for update data_product_data = self.dpsc_cli.prepare_data_product_support(dp_id) #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2) self.assertEqual(data_product_data._id, dp_id) self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport) self.assertEqual(len(data_product_data.associations['StreamDefinition'].resources), 2) self.assertEqual(len(data_product_data.associations['Dataset'].resources), 1) self.assertEqual(len(data_product_data.associations['StreamDefinition'].associated_resources), 1) self.assertEqual(data_product_data.associations['StreamDefinition'].associated_resources[0].s, dp_id) self.assertEqual(len(data_product_data.associations['Dataset'].associated_resources), 1) self.assertEqual(data_product_data.associations['Dataset'].associated_resources[0].s, dp_id) # now 'delete' the data product log.debug("deleting data product: %s" % dp_id) self.dpsc_cli.delete_data_product(dp_id) # Assert that there are no associated streams leftover after deleting the data product stream_ids, assoc_ids = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True) self.assertEquals(len(stream_ids), 0) self.assertEquals(len(assoc_ids), 0) self.dpsc_cli.force_delete_data_product(dp_id) # now try to get the deleted dp object with self.assertRaises(NotFound): dp_obj = self.dpsc_cli.read_data_product(dp_id) # Get the events corresponding to the data product ret = self.unsc.get_recent_events(resource_id=dp_id) events = ret.value for event in events: log.debug("event time: %s" % event.ts_created) self.assertTrue(len(events) > 0) def test_data_product_stream_def(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id) dp_obj = IonObject(RT.DataProduct, name='DP1', description='some new dp') dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj, stream_definition_id=ctd_stream_def_id) stream_def_id = self.dpsc_cli.get_data_product_stream_definition(dp_id) self.assertEquals(ctd_stream_def_id, stream_def_id) def test_derived_data_product(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsubcli.delete_stream_definition, ctd_stream_def_id) dp = DataProduct(name='Instrument DP') dp_id = self.dpsc_cli.create_data_product(dp, stream_definition_id=ctd_stream_def_id) self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id) self.dpsc_cli.activate_data_product_persistence(dp_id) self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id) dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True) if not dataset_ids: raise NotFound("Data Product %s dataset does not exist" % str(dp_id)) dataset_id = dataset_ids[0] # Make the derived data product simple_stream_def_id = self.pubsubcli.create_stream_definition(name='TEMPWAT stream def', parameter_dictionary_id=pdict_id, available_fields=['time','temp']) tempwat_dp = DataProduct(name='TEMPWAT', category=DataProductTypeEnum.DERIVED) tempwat_dp_id = self.dpsc_cli.create_data_product(tempwat_dp, stream_definition_id=simple_stream_def_id, parent_data_product_id=dp_id) self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id) # Check that the streams associated with the data product are persisted with stream_ids, _ = self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True) for stream_id in stream_ids: self.assertTrue(self.ingestclient.is_persisted(stream_id)) stream_id = stream_ids[0] route = self.pubsubcli.read_stream_route(stream_id=stream_id) rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) rdt['pressure'] = np.arange(20) publisher = StandaloneStreamPublisher(stream_id,route) dataset_modified = Event() def cb(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True) es.start() self.addCleanup(es.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_modified.wait(30)) tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id, PRED.hasDataset, id_only=True) tempwat_dataset_id = tempwat_dataset_ids[0] granule = self.data_retriever.retrieve(tempwat_dataset_id, delivery_format=simple_stream_def_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(20)) self.assertEquals(set(rdt.fields), set(['time','temp'])) def test_activate_suspend_data_product(self): #------------------------------------------------------------------------------------------------ # create a stream definition for the data from the ctd simulator #------------------------------------------------------------------------------------------------ pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id) log.debug("Created stream def id %s" % ctd_stream_def_id) #------------------------------------------------------------------------------------------------ # test creating a new data product w/o a stream definition #------------------------------------------------------------------------------------------------ # Construct temporal and spatial Coordinate Reference System objects dp_obj = IonObject(RT.DataProduct, name='DP1', description='some new dp') log.debug("Created an IonObject for a data product: %s" % dp_obj) #------------------------------------------------------------------------------------------------ # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary #------------------------------------------------------------------------------------------------ dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj, stream_definition_id=ctd_stream_def_id) #------------------------------------------------------------------------------------------------ # Subscribe to persist events #------------------------------------------------------------------------------------------------ queue = gevent.queue.Queue() def info_event_received(message, headers): queue.put(message) es = EventSubscriber(event_type=OT.InformationContentStatusEvent, callback=info_event_received, origin=dp_id, auto_delete=True) es.start() self.addCleanup(es.stop) #------------------------------------------------------------------------------------------------ # test activate and suspend data product persistence #------------------------------------------------------------------------------------------------ self.dpsc_cli.activate_data_product_persistence(dp_id) dp_obj = self.dpsc_cli.read_data_product(dp_id) self.assertIsNotNone(dp_obj) dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True) if not dataset_ids: raise NotFound("Data Product %s dataset does not exist" % str(dp_id)) dataset_id = dataset_ids[0] # Check that the streams associated with the data product are persisted with stream_ids, _ = self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True) for stream_id in stream_ids: self.assertTrue(self.ingestclient.is_persisted(stream_id)) stream_id = stream_ids[0] route = self.pubsubcli.read_stream_route(stream_id=stream_id) rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) publisher = StandaloneStreamPublisher(stream_id,route) dataset_modified = Event() def cb(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True) es.start() self.addCleanup(es.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_modified.wait(30)) #-------------------------------------------------------------------------------- # Now get the data in one chunk using an RPC Call to start_retreive #-------------------------------------------------------------------------------- replay_data = self.data_retriever.retrieve(dataset_ids[0]) self.assertIsInstance(replay_data, Granule) log.debug("The data retriever was able to replay the dataset that was attached to the data product " "we wanted to be persisted. Therefore the data product was indeed persisted with " "otherwise we could not have retrieved its dataset using the data retriever. Therefore " "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'") data_product_object = self.rrclient.read(dp_id) self.assertEquals(data_product_object.name,'DP1') self.assertEquals(data_product_object.description,'some new dp') log.debug("Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. " " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the " "resource registry, name='%s', desc='%s'" % (dp_obj.name, dp_obj.description,data_product_object.name, data_product_object.description)) #------------------------------------------------------------------------------------------------ # test suspend data product persistence #------------------------------------------------------------------------------------------------ self.dpsc_cli.suspend_data_product_persistence(dp_id) dataset_modified.clear() rdt['time'] = np.arange(20,40) publisher.publish(rdt.to_granule()) self.assertFalse(dataset_modified.wait(2)) self.dpsc_cli.activate_data_product_persistence(dp_id) dataset_modified.clear() publisher.publish(rdt.to_granule()) self.assertTrue(dataset_modified.wait(30)) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_almost_equal(rdt['time'], np.arange(40)) dataset_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasDataset, id_only=True) self.assertEquals(len(dataset_ids), 1) self.dpsc_cli.suspend_data_product_persistence(dp_id) self.dpsc_cli.force_delete_data_product(dp_id) # now try to get the deleted dp object with self.assertRaises(NotFound): dp_obj = self.rrclient.read(dp_id) info_event_counter = 0 runtime = 0 starttime = time.time() caught_events = [] #check that the four InfoStatusEvents were received while info_event_counter < 4 and runtime < 60 : a = queue.get(timeout=60) caught_events.append(a) info_event_counter += 1 runtime = time.time() - starttime self.assertEquals(info_event_counter, 4)
class TestTransformWorkerSubscriptions(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dataset_management_client = DatasetManagementServiceClient( node=self.container.node) self.pubsub_client = PubsubManagementServiceClient( node=self.container.node) self.dataproductclient = DataProductManagementServiceClient( node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient( node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient( node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient( node=self.container.node) self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_multi_subscriptions(self): self.dp_list = [] self.event1_verified = Event() self.event2_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product_one', description='input test stream one') self.input_dp_one_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) input_dp_obj = IonObject(RT.DataProduct, name='input_data_product_two', description='input test stream two') self.input_dp_two_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) #retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_one_id, PRED.hasStream, RT.Stream, True) self.stream_one_id = stream_ids[0] stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_two_id, PRED.hasStream, RT.Stream, True) self.stream_two_id = stream_ids[0] dpd_id = self.create_data_process_definition() dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products( ) first_dp_id = self.create_data_process_one(dpd_id, dp1_func_output_dp_id) second_dp_id = self.create_data_process_two(dpd_id, self.input_dp_two_id, dp2_func_output_dp_id) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=first_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #create subscription to stream ONE, create data process and publish granule on stream ONE #create a queue to catch the published granules of stream ONE self.subscription_one_id = self.pubsub_client.create_subscription( name='parsed_subscription_one', stream_ids=[self.stream_one_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_one_id) self.pubsub_client.activate_subscription(self.subscription_one_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_one_id) stream_route_one = self.pubsub_client.read_stream_route( self.stream_one_id) self.publisher_one = StandaloneStreamPublisher( stream_id=self.stream_one_id, stream_route=stream_route_one) self.start_event_listener() #data process 1 adds conductivity + pressure and puts the result in salinity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=second_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #create subscription to stream ONE and TWO, move TW subscription, create data process and publish granule on stream TWO #create a queue to catch the published granules of stream TWO self.subscription_two_id = self.pubsub_client.create_subscription( name='parsed_subscription_one_two', stream_ids=[self.stream_two_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_two_id) self.pubsub_client.activate_subscription(self.subscription_two_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_two_id) stream_route_two = self.pubsub_client.read_stream_route( self.stream_two_id) self.publisher_two = StandaloneStreamPublisher( stream_id=self.stream_two_id, stream_route=stream_route_two) #data process 1 adds conductivity + pressure and puts the result in salinity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) #data process 2 adds salinity + pressure and puts the result in conductivity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [22] rdt['pressure'] = [4] rdt['salinity'] = [1] self.publisher_two.publish(msg=rdt.to_granule(), stream_id=self.stream_two_id) self.assertTrue(self.event2_verified.wait(self.wait_time)) self.assertTrue(self.event1_verified.wait(self.wait_time)) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_two_transforms_inline(self): self.dp_list = [] self.event1_verified = Event() self.event2_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product_one', description='input test stream one') self.input_dp_one_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) dpd_id = self.create_data_process_definition() dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products( ) first_dp_id = self.create_data_process_one(dpd_id, dp1_func_output_dp_id) second_dp_id = self.create_data_process_two(dpd_id, dp1_func_output_dp_id, dp2_func_output_dp_id) #retrieve subscription from data process one subscription_objs, _ = self.rrclient.find_objects( subject=first_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #retrieve the Stream for these data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_one_id, PRED.hasStream, RT.Stream, True) self.stream_one_id = stream_ids[0] #the input to data process two is the output from data process one stream_ids, assoc_ids = self.rrclient.find_objects( dp1_func_output_dp_id, PRED.hasStream, RT.Stream, True) self.stream_two_id = stream_ids[0] # Run provenance on the output dataproduct of the second data process to see all the links # are as expected output_data_product_provenance = self.dataproductclient.get_data_product_provenance( dp2_func_output_dp_id) # Do a basic check to see if there were 2 entries in the provenance graph. Parent and Child. self.assertTrue(len(output_data_product_provenance) == 3) # confirm that the linking from the output dataproduct to input dataproduct is correct self.assertTrue( dp1_func_output_dp_id in output_data_product_provenance[dp2_func_output_dp_id]['parents']) self.assertTrue( self.input_dp_one_id in output_data_product_provenance[dp1_func_output_dp_id]['parents']) #create subscription to stream ONE, create data process and publish granule on stream ONE #create a queue to catch the published granules of stream ONE subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_one_id, self.stream_two_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) stream_route_one = self.pubsub_client.read_stream_route( self.stream_one_id) self.publisher_one = StandaloneStreamPublisher( stream_id=self.stream_one_id, stream_route=stream_route_one) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=second_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #data process 1 adds conductivity + pressure and puts the result in salinity #data process 2 adds salinity + pressure and puts the result in conductivity self.start_event_listener() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) self.assertTrue(self.event2_verified.wait(self.wait_time)) self.assertTrue(self.event1_verified.wait(self.wait_time)) def create_data_process_definition(self): #two data processes using one transform and one DPD # Set up DPD and DP #2 - array add function tf_obj = IonObject( RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM, uri= 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject( RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, ) add_array_dpd_id = self.dataprocessclient.create_data_process_definition( data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition( self.stream_def_id, add_array_dpd_id, binding='add_array_func') return add_array_dpd_id def create_data_process_one(self, data_process_definition_id, output_dataproduct): # Create the data process #data process 1 adds conductivity + pressure and puts the result in salinity argument_map = {"arr1": "conductivity", "arr2": "pressure"} output_param = "salinity" dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=data_process_definition_id, inputs=[self.input_dp_one_id], outputs=[output_dataproduct], argument_map=argument_map, out_param_name=output_param) self.damsclient.register_process(dp1_data_process_id) self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id) self.dp_list.append(dp1_data_process_id) return dp1_data_process_id def create_data_process_two(self, data_process_definition_id, input_dataproduct, output_dataproduct): # Create the data process #data process 2 adds salinity + pressure and puts the result in conductivity argument_map = {'arr1': 'salinity', 'arr2': 'pressure'} output_param = 'conductivity' dp2_func_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=data_process_definition_id, inputs=[input_dataproduct], outputs=[output_dataproduct], argument_map=argument_map, out_param_name=output_param) self.damsclient.register_process(dp2_func_data_process_id) self.addCleanup(self.dataprocessclient.delete_data_process, dp2_func_data_process_id) self.dp_list.append(dp2_func_data_process_id) return dp2_func_data_process_id def create_output_data_products(self): dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition( name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id) dp1_output_dp_obj = IonObject(RT.DataProduct, name='data_process1_data_product', description='output of add array func') dp1_func_output_dp_id = self.dataproductclient.create_data_product( dp1_output_dp_obj, dp1_outgoing_stream_id) self.addCleanup(self.dataproductclient.delete_data_product, dp1_func_output_dp_id) # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id, PRED.hasStream, None, True) self._output_stream_one_id = stream_ids[0] dp2_func_outgoing_stream_id = self.pubsub_client.create_stream_definition( name='dp2_stream', parameter_dictionary_id=self.parameter_dict_id) dp2_func_output_dp_obj = IonObject( RT.DataProduct, name='data_process2_data_product', description='output of add array func') dp2_func_output_dp_id = self.dataproductclient.create_data_product( dp2_func_output_dp_obj, dp2_func_outgoing_stream_id) self.addCleanup(self.dataproductclient.delete_data_product, dp2_func_output_dp_id) # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger stream_ids, _ = self.rrclient.find_objects(dp2_func_output_dp_id, PRED.hasStream, None, True) self._output_stream_two_id = stream_ids[0] subscription_id = self.pubsub_client.create_subscription( 'validator', data_product_ids=[dp1_func_output_dp_id, dp2_func_output_dp_id]) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) def on_granule(msg, route, stream_id): log.debug('recv_packet stream_id: %s route: %s msg: %s', stream_id, route, msg) self.validate_output_granule(msg, route, stream_id) validator = StandaloneStreamSubscriber('validator', callback=on_granule) validator.start() self.addCleanup(validator.stop) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) return dp1_func_output_dp_id, dp2_func_output_dp_id def validate_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ data_process_event = args[0] log.debug("DataProcessStatusEvent: %s", str(data_process_event.__dict__)) #if data process already created, check origin if not 'data process assigned to transform worker' in data_process_event.description: self.assertIn(data_process_event.origin, self.dp_list) def validate_output_granule(self, msg, route, stream_id): self.assertTrue( stream_id in [self._output_stream_one_id, self._output_stream_two_id]) rdt = RecordDictionaryTool.load_from_granule(msg) log.debug('validate_output_granule stream_id: %s', stream_id) if stream_id == self._output_stream_one_id: sal_val = rdt['salinity'] log.debug('validate_output_granule sal_val: %s', sal_val) np.testing.assert_array_equal(sal_val, np.array([3])) self.event1_verified.set() else: cond_val = rdt['conductivity'] log.debug('validate_output_granule cond_val: %s', cond_val) np.testing.assert_array_equal(cond_val, np.array([5])) self.event2_verified.set() def start_event_listener(self): es = EventSubscriber(event_type=OT.DataProcessStatusEvent, callback=self.validate_event) es.start() self.addCleanup(es.stop)
class CtdbpTransformsIntTest(IonIntegrationTestCase): def setUp(self): super(CtdbpTransformsIntTest, self).setUp() self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub = PubsubManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.dataproduct_management = DataProductManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() # This is for the time values inside the packets going into the transform self.i = 0 # Cleanup of queue created by the subscriber def _get_new_ctd_packet(self, stream_definition_id, length): rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) rdt['time'] = numpy.arange(self.i, self.i + length) for field in rdt: if isinstance( rdt._pdict.get_context(field).param_type, QuantityType): rdt[field] = numpy.array( [random.uniform(0.0, 75.0) for i in xrange(length)]) g = rdt.to_granule() self.i += length return g def _create_input_param_dict_for_test(self, parameter_dict_name=''): pdict = ParameterDictionary() t_ctxt = ParameterContext( 'time', param_type=QuantityType(value_encoding=numpy.dtype('float64'))) t_ctxt.axis = AxisTypeEnum.TIME t_ctxt.uom = 'seconds since 01-01-1900' pdict.add_context(t_ctxt) cond_ctxt = ParameterContext( 'conductivity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) cond_ctxt.uom = '' pdict.add_context(cond_ctxt) pres_ctxt = ParameterContext( 'pressure', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) pres_ctxt.uom = '' pdict.add_context(pres_ctxt) temp_ctxt = ParameterContext( 'temperature', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) temp_ctxt.uom = '' pdict.add_context(temp_ctxt) dens_ctxt = ParameterContext( 'density', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) dens_ctxt.uom = '' pdict.add_context(dens_ctxt) sal_ctxt = ParameterContext( 'salinity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) sal_ctxt.uom = '' pdict.add_context(sal_ctxt) #create temp streamdef so the data product can create the stream pc_list = [] for pc_k, pc in pdict.iteritems(): ctxt_id = self.dataset_management.create_parameter_context( pc_k, pc[1].dump()) pc_list.append(ctxt_id) self.addCleanup(self.dataset_management.delete_parameter_context, ctxt_id) pdict_id = self.dataset_management.create_parameter_dictionary( parameter_dict_name, pc_list) self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) return pdict_id def test_ctdbp_L0_all(self): """ Test packets processed by the ctdbp_L0_all transform """ #----------- Data Process Definition -------------------------------- dpd_obj = IonObject( RT.DataProcessDefinition, name='CTDBP_L0_all', description= 'Take parsed stream and put the C, T and P into three separate L0 streams.', module='ion.processes.data.transforms.ctdbp.ctdbp_L0', class_name='CTDBP_L0_all') dprocdef_id = self.data_process_management.create_data_process_definition( dpd_obj) self.addCleanup( self.data_process_management.delete_data_process_definition, dprocdef_id) log.debug("created data process definition: id = %s", dprocdef_id) #----------- Data Products -------------------------------- # Construct temporal and spatial Coordinate Reference System objects tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() input_param_dict = self._create_input_param_dict_for_test( parameter_dict_name='fictitious_ctdp_param_dict') # Get the stream definition for the stream using the parameter dictionary # input_param_dict = self.dataset_management.read_parameter_dictionary_by_name('ctdbp_cdef_sample', id_only=True) input_stream_def_dict = self.pubsub.create_stream_definition( name='parsed', parameter_dictionary_id=input_param_dict) self.addCleanup(self.pubsub.delete_stream_definition, input_stream_def_dict) log.debug("Got the parsed parameter dictionary: id: %s", input_param_dict) log.debug("Got the stream def for parsed input: %s", input_stream_def_dict) # Input data product parsed_stream_dp_obj = IonObject( RT.DataProduct, name='parsed_stream', description='Parsed stream input to CTBP L0 transform', temporal_domain=tdom, spatial_domain=sdom) input_dp_id = self.dataproduct_management.create_data_product( data_product=parsed_stream_dp_obj, stream_definition_id=input_stream_def_dict) self.addCleanup(self.dataproduct_management.delete_data_product, input_dp_id) # output data product L0_stream_dp_obj = IonObject( RT.DataProduct, name='L0_stream', description='L0_stream output of CTBP L0 transform', temporal_domain=tdom, spatial_domain=sdom) L0_stream_dp_id = self.dataproduct_management.create_data_product( data_product=L0_stream_dp_obj, stream_definition_id=input_stream_def_dict) self.addCleanup(self.dataproduct_management.delete_data_product, L0_stream_dp_id) # We need the key name here to be "L0_stream", since when the data process is launched, this name goes into # the config as in config.process.publish_streams.L0_stream when the config is used to launch the data process out_stream_ids, _ = self.resource_registry.find_objects( L0_stream_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(out_stream_ids)) output_stream_id = out_stream_ids[0] dproc_id = self.data_process_management.create_data_process( data_process_definition_id=dprocdef_id, in_data_product_ids=[input_dp_id], out_data_product_ids=[L0_stream_dp_id], configuration=None) self.addCleanup(self.data_process_management.delete_data_process, dproc_id) log.debug("Created a data process for ctdbp_L0. id: %s", dproc_id) # Activate the data process self.data_process_management.activate_data_process(dproc_id) self.addCleanup(self.data_process_management.deactivate_data_process, dproc_id) #----------- Find the stream that is associated with the input data product when it was created by create_data_product() -------------------------------- stream_ids, _ = self.resource_registry.find_objects( input_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(stream_ids)) input_stream_id = stream_ids[0] stream_route = self.pubsub.read_stream_route(input_stream_id) log.debug("The input stream for the L0 transform: %s", input_stream_id) #----------- Create a subscriber that will listen to the transform's output -------------------------------- ar = gevent.event.AsyncResult() def subscriber(m, r, s): ar.set(m) sub = StandaloneStreamSubscriber(exchange_name='sub', callback=subscriber) sub_id = self.pubsub.create_subscription('subscriber_to_transform', stream_ids=[output_stream_id], exchange_name='sub') self.addCleanup(self.pubsub.delete_subscription, sub_id) self.pubsub.activate_subscription(sub_id) self.addCleanup(self.pubsub.deactivate_subscription, sub_id) sub.start() self.addCleanup(sub.stop) #----------- Publish on that stream so that the transform can receive it -------------------------------- pub = StandaloneStreamPublisher(input_stream_id, stream_route) publish_granule = self._get_new_ctd_packet( stream_definition_id=input_stream_def_dict, length=5) pub.publish(publish_granule) log.debug("Published the following granule: %s", publish_granule) granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the transform: %s", granule_from_transform) # Check that the granule published by the L0 transform has the right properties self._check_granule_from_transform(granule_from_transform) def _check_granule_from_transform(self, granule): """ An internal method to check if a granule has the right properties """ pass
class TestDataProductManagementServiceIntegration(IonIntegrationTestCase): def setUp(self): # Start container #print 'instantiating container' self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dpsc_cli = DataProductManagementServiceClient() self.rrclient = ResourceRegistryServiceClient() self.damsclient = DataAcquisitionManagementServiceClient() self.pubsubcli = PubsubManagementServiceClient() self.ingestclient = IngestionManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.unsc = UserNotificationServiceClient() self.data_retriever = DataRetrieverServiceClient() #------------------------------------------ # Create the environment #------------------------------------------ datastore_name = CACHE_DATASTORE_NAME self.db = self.container.datastore_manager.get_datastore(datastore_name) self.stream_def_id = self.pubsubcli.create_stream_definition(name='SBE37_CDM') self.process_definitions = {} ingestion_worker_definition = ProcessDefinition(name='ingestion worker') ingestion_worker_definition.executable = { 'module':'ion.processes.data.ingestion.science_granule_ingestion_worker', 'class' :'ScienceGranuleIngestionWorker' } process_definition_id = self.process_dispatcher.create_process_definition(process_definition=ingestion_worker_definition) self.process_definitions['ingestion_worker'] = process_definition_id self.pids = [] self.exchange_points = [] self.exchange_names = [] #------------------------------------------------------------------------------------------------ # First launch the ingestors #------------------------------------------------------------------------------------------------ self.exchange_space = 'science_granule_ingestion' self.exchange_point = 'science_data' config = DotDict() config.process.datastore_name = 'datasets' config.process.queue_name = self.exchange_space self.exchange_names.append(self.exchange_space) self.exchange_points.append(self.exchange_point) pid = self.process_dispatcher.schedule_process(self.process_definitions['ingestion_worker'],configuration=config) log.debug("the ingestion worker process id: %s", pid) self.pids.append(pid) self.addCleanup(self.cleaning_up) def cleaning_up(self): for pid in self.pids: log.debug("number of pids to be terminated: %s", len(self.pids)) try: self.process_dispatcher.cancel_process(pid) log.debug("Terminated the process: %s", pid) except: log.debug("could not terminate the process id: %s" % pid) IngestionManagementIntTest.clean_subscriptions() for xn in self.exchange_names: xni = self.container.ex_manager.create_xn_queue(xn) xni.delete() for xp in self.exchange_points: xpi = self.container.ex_manager.create_xp(xp) xpi.delete() def get_datastore(self, dataset_id): dataset = self.dataset_management.read_dataset(dataset_id) datastore_name = dataset.datastore_name datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA) return datastore @attr('EXT') @attr('PREP') def test_create_data_product(self): #------------------------------------------------------------------------------------------------ # create a stream definition for the data from the ctd simulator #------------------------------------------------------------------------------------------------ parameter_dictionary = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict') ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=parameter_dictionary._id) log.debug("Created stream def id %s" % ctd_stream_def_id) #------------------------------------------------------------------------------------------------ # test creating a new data product w/o a stream definition #------------------------------------------------------------------------------------------------ # Generic time-series data domain creation tdom, sdom = time_series_domain() dp_obj = IonObject(RT.DataProduct, name='DP1', description='some new dp', temporal_domain = tdom.dump(), spatial_domain = sdom.dump()) dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 10.0 dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -10.0 dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 10.0 dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -10.0 dp_obj.ooi_product_name = "PRODNAME" #------------------------------------------------------------------------------------------------ # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary #------------------------------------------------------------------------------------------------ dp_id = self.dpsc_cli.create_data_product( data_product= dp_obj, stream_definition_id=ctd_stream_def_id) # Assert that the data product has an associated stream at this stage stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True) self.assertNotEquals(len(stream_ids), 0) # Assert that the data product has an associated stream def at this stage stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStreamDefinition, RT.StreamDefinition, True) self.assertNotEquals(len(stream_ids), 0) self.dpsc_cli.activate_data_product_persistence(dp_id) dp_obj = self.dpsc_cli.read_data_product(dp_id) self.assertIsNotNone(dp_obj) self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0) log.debug('Created data product %s', dp_obj) #------------------------------------------------------------------------------------------------ # test creating a new data product with a stream definition #------------------------------------------------------------------------------------------------ log.debug('Creating new data product with a stream definition') dp_obj = IonObject(RT.DataProduct, name='DP2', description='some new dp', temporal_domain = tdom.dump(), spatial_domain = sdom.dump()) dp_id2 = self.dpsc_cli.create_data_product(dp_obj, ctd_stream_def_id) self.dpsc_cli.activate_data_product_persistence(dp_id2) log.debug('new dp_id = %s' % dp_id2) #------------------------------------------------------------------------------------------------ #make sure data product is associated with stream def #------------------------------------------------------------------------------------------------ streamdefs = [] streams, _ = self.rrclient.find_objects(dp_id2, PRED.hasStream, RT.Stream, True) for s in streams: log.debug("Checking stream %s" % s) sdefs, _ = self.rrclient.find_objects(s, PRED.hasStreamDefinition, RT.StreamDefinition, True) for sd in sdefs: log.debug("Checking streamdef %s" % sd) streamdefs.append(sd) self.assertIn(ctd_stream_def_id, streamdefs) group_names = self.dpsc_cli.get_data_product_group_list() self.assertIn("PRODNAME", group_names) # test reading a non-existent data product log.debug('reading non-existent data product') with self.assertRaises(NotFound): dp_obj = self.dpsc_cli.read_data_product('some_fake_id') # update a data product (tests read also) log.debug('Updating data product') # first get the existing dp object dp_obj = self.dpsc_cli.read_data_product(dp_id) # now tweak the object dp_obj.description = 'the very first dp' dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 20.0 dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -20.0 dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 20.0 dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -20.0 # now write the dp back to the registry update_result = self.dpsc_cli.update_data_product(dp_obj) # now get the dp back to see if it was updated dp_obj = self.dpsc_cli.read_data_product(dp_id) self.assertEquals(dp_obj.description,'the very first dp') self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0) log.debug('Updated data product %s', dp_obj) #test extension extended_product = self.dpsc_cli.get_data_product_extension(dp_id) self.assertEqual(dp_id, extended_product._id) self.assertEqual(ComputedValueAvailability.PROVIDED, extended_product.computed.product_download_size_estimated.status) self.assertEqual(0, extended_product.computed.product_download_size_estimated.value) self.assertEqual(ComputedValueAvailability.PROVIDED, extended_product.computed.parameters.status) #log.debug("test_create_data_product: parameters %s" % extended_product.computed.parameters.value) def ion_object_encoder(obj): return obj.__dict__ #test prepare for create data_product_data = self.dpsc_cli.prepare_data_product_support() #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2) self.assertEqual(data_product_data._id, "") self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport) self.assertEqual(len(data_product_data.associations['StreamDefinition'].resources), 2) self.assertEqual(len(data_product_data.associations['Dataset'].resources), 0) self.assertEqual(len(data_product_data.associations['StreamDefinition'].associated_resources), 0) self.assertEqual(len(data_product_data.associations['Dataset'].associated_resources), 0) #test prepare for update data_product_data = self.dpsc_cli.prepare_data_product_support(dp_id) #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2) self.assertEqual(data_product_data._id, dp_id) self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport) self.assertEqual(len(data_product_data.associations['StreamDefinition'].resources), 2) self.assertEqual(len(data_product_data.associations['Dataset'].resources), 1) self.assertEqual(len(data_product_data.associations['StreamDefinition'].associated_resources), 1) self.assertEqual(data_product_data.associations['StreamDefinition'].associated_resources[0].s, dp_id) self.assertEqual(len(data_product_data.associations['Dataset'].associated_resources), 1) self.assertEqual(data_product_data.associations['Dataset'].associated_resources[0].s, dp_id) # now 'delete' the data product log.debug("deleting data product: %s" % dp_id) self.dpsc_cli.delete_data_product(dp_id) # Assert that there are no associated streams leftover after deleting the data product stream_ids, assoc_ids = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True) self.assertEquals(len(stream_ids), 0) self.assertEquals(len(assoc_ids), 0) self.dpsc_cli.force_delete_data_product(dp_id) # now try to get the deleted dp object with self.assertRaises(NotFound): dp_obj = self.dpsc_cli.read_data_product(dp_id) # Get the events corresponding to the data product ret = self.unsc.get_recent_events(resource_id=dp_id) events = ret.value for event in events: log.debug("event time: %s" % event.ts_created) self.assertTrue(len(events) > 0) def test_data_product_stream_def(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id) tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() dp_obj = IonObject(RT.DataProduct, name='DP1', description='some new dp', temporal_domain = tdom, spatial_domain = sdom) dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj, stream_definition_id=ctd_stream_def_id) stream_def_id = self.dpsc_cli.get_data_product_stream_definition(dp_id) self.assertEquals(ctd_stream_def_id, stream_def_id) def test_derived_data_product(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsubcli.delete_stream_definition, ctd_stream_def_id) tdom, sdom = time_series_domain() dp = DataProduct(name='Instrument DP', temporal_domain=tdom.dump(), spatial_domain=sdom.dump()) dp_id = self.dpsc_cli.create_data_product(dp, stream_definition_id=ctd_stream_def_id) self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id) self.dpsc_cli.activate_data_product_persistence(dp_id) self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id) dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True) if not dataset_ids: raise NotFound("Data Product %s dataset does not exist" % str(dp_id)) dataset_id = dataset_ids[0] # Make the derived data product simple_stream_def_id = self.pubsubcli.create_stream_definition(name='TEMPWAT stream def', parameter_dictionary_id=pdict_id, available_fields=['time','temp']) tempwat_dp = DataProduct(name='TEMPWAT') tempwat_dp_id = self.dpsc_cli.create_data_product(tempwat_dp, stream_definition_id=simple_stream_def_id, parent_data_product_id=dp_id) self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id) # Check that the streams associated with the data product are persisted with stream_ids, _ = self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True) for stream_id in stream_ids: self.assertTrue(self.ingestclient.is_persisted(stream_id)) stream_id = stream_ids[0] route = self.pubsubcli.read_stream_route(stream_id=stream_id) rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) rdt['pressure'] = np.arange(20) publisher = StandaloneStreamPublisher(stream_id,route) dataset_modified = Event() def cb(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True) es.start() self.addCleanup(es.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_modified.wait(30)) tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id, PRED.hasDataset, id_only=True) tempwat_dataset_id = tempwat_dataset_ids[0] granule = self.data_retriever.retrieve(tempwat_dataset_id, delivery_format=simple_stream_def_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(20)) self.assertEquals(set(rdt.fields), set(['time','temp'])) def test_activate_suspend_data_product(self): #------------------------------------------------------------------------------------------------ # create a stream definition for the data from the ctd simulator #------------------------------------------------------------------------------------------------ pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id) log.debug("Created stream def id %s" % ctd_stream_def_id) #------------------------------------------------------------------------------------------------ # test creating a new data product w/o a stream definition #------------------------------------------------------------------------------------------------ # Construct temporal and spatial Coordinate Reference System objects tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() dp_obj = IonObject(RT.DataProduct, name='DP1', description='some new dp', temporal_domain = tdom, spatial_domain = sdom) log.debug("Created an IonObject for a data product: %s" % dp_obj) #------------------------------------------------------------------------------------------------ # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary #------------------------------------------------------------------------------------------------ dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj, stream_definition_id=ctd_stream_def_id) #------------------------------------------------------------------------------------------------ # test activate and suspend data product persistence #------------------------------------------------------------------------------------------------ self.dpsc_cli.activate_data_product_persistence(dp_id) dp_obj = self.dpsc_cli.read_data_product(dp_id) self.assertIsNotNone(dp_obj) dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True) if not dataset_ids: raise NotFound("Data Product %s dataset does not exist" % str(dp_id)) dataset_id = dataset_ids[0] # Check that the streams associated with the data product are persisted with stream_ids, _ = self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True) for stream_id in stream_ids: self.assertTrue(self.ingestclient.is_persisted(stream_id)) stream_id = stream_ids[0] route = self.pubsubcli.read_stream_route(stream_id=stream_id) rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) publisher = StandaloneStreamPublisher(stream_id,route) dataset_modified = Event() def cb(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True) es.start() self.addCleanup(es.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_modified.wait(30)) #-------------------------------------------------------------------------------- # Now get the data in one chunk using an RPC Call to start_retreive #-------------------------------------------------------------------------------- replay_data = self.data_retriever.retrieve(dataset_ids[0]) self.assertIsInstance(replay_data, Granule) log.debug("The data retriever was able to replay the dataset that was attached to the data product " "we wanted to be persisted. Therefore the data product was indeed persisted with " "otherwise we could not have retrieved its dataset using the data retriever. Therefore " "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'") data_product_object = self.rrclient.read(dp_id) self.assertEquals(data_product_object.name,'DP1') self.assertEquals(data_product_object.description,'some new dp') log.debug("Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. " " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the " "resource registry, name='%s', desc='%s'" % (dp_obj.name, dp_obj.description,data_product_object.name, data_product_object.description)) #------------------------------------------------------------------------------------------------ # test suspend data product persistence #------------------------------------------------------------------------------------------------ self.dpsc_cli.suspend_data_product_persistence(dp_id) dataset_modified.clear() rdt['time'] = np.arange(20,40) publisher.publish(rdt.to_granule()) self.assertFalse(dataset_modified.wait(2)) self.dpsc_cli.activate_data_product_persistence(dp_id) dataset_modified.clear() publisher.publish(rdt.to_granule()) self.assertTrue(dataset_modified.wait(30)) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_almost_equal(rdt['time'], np.arange(40)) dataset_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasDataset, id_only=True) self.assertEquals(len(dataset_ids), 1) self.dpsc_cli.suspend_data_product_persistence(dp_id) self.dpsc_cli.force_delete_data_product(dp_id) # now try to get the deleted dp object with self.assertRaises(NotFound): dp_obj = self.rrclient.read(dp_id) def test_lookup_values(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_lookups() stream_def_id = self.pubsubcli.create_stream_definition('lookup', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsubcli.delete_stream_definition, stream_def_id) data_product = DataProduct(name='lookup data product') tdom, sdom = time_series_domain() data_product.temporal_domain = tdom.dump() data_product.spatial_domain = sdom.dump() data_product_id = self.dpsc_cli.create_data_product(data_product, stream_definition_id=stream_def_id) self.addCleanup(self.dpsc_cli.delete_data_product, data_product_id) data_producer = DataProducer(name='producer') data_producer.producer_context = DataProcessProducerContext() data_producer.producer_context.configuration['qc_keys'] = ['offset_document'] data_producer_id, _ = self.rrclient.create(data_producer) self.addCleanup(self.rrclient.delete, data_producer_id) assoc,_ = self.rrclient.create_association(subject=data_product_id, object=data_producer_id, predicate=PRED.hasDataProducer) self.addCleanup(self.rrclient.delete_association, assoc) document_keys = self.damsclient.list_qc_references(data_product_id) self.assertEquals(document_keys, ['offset_document']) svm = StoredValueManager(self.container) svm.stored_value_cas('offset_document', {'offset_a':2.0}) self.dpsc_cli.activate_data_product_persistence(data_product_id) dataset_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasDataset, id_only=True) dataset_id = dataset_ids[0] dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [0] rdt['temp'] = [20.] granule = rdt.to_granule() stream_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) stream_id = stream_ids[0] route = self.pubsubcli.read_stream_route(stream_id=stream_id) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(10)) granule = self.data_retriever.retrieve(dataset_id) rdt2 = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['temp'], rdt2['temp']) np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0])) svm.stored_value_cas('updated_document', {'offset_a':3.0}) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ep = EventPublisher(event_type=OT.ExternalReferencesUpdatedEvent) ep.publish_event(origin=data_product_id, reference_keys=['updated_document']) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [1] rdt['temp'] = [20.] granule = rdt.to_granule() gevent.sleep(2) # Yield so that the event goes through publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(10)) granule = self.data_retriever.retrieve(dataset_id) rdt2 = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt2['temp'],np.array([20.,20.])) np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0,23.0]))
class BulkIngestBase(object): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub_management = PubsubManagementServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() self.data_acquisition_management = DataAcquisitionManagementServiceClient() self.data_retriever = DataRetrieverServiceClient() self.process_dispatch_client = ProcessDispatcherServiceClient(node=self.container.node) self.resource_registry = self.container.resource_registry self.context_ids = self.build_param_contexts() self.setup_resources() def build_param_contexts(self): raise NotImplementedError('build_param_contexts must be implemented in child classes') def create_external_dataset(self): raise NotImplementedError('create_external_dataset must be implemented in child classes') def get_dvr_config(self): raise NotImplementedError('get_dvr_config must be implemented in child classes') def get_retrieve_client(self, dataset_id=''): raise NotImplementedError('get_retrieve_client must be implemented in child classes') def test_data_ingest(self): self.pdict_id = self.create_parameter_dict(self.name) self.stream_def_id = self.create_stream_def(self.name, self.pdict_id) self.data_product_id = self.create_data_product(self.name, self.description, self.stream_def_id) self.dataset_id = self.get_dataset_id(self.data_product_id) self.stream_id, self.route = self.get_stream_id_and_route(self.data_product_id) self.external_dataset_id = self.create_external_dataset() self.data_producer_id = self.register_external_dataset(self.external_dataset_id) self.start_agent() def create_parameter_dict(self, name=''): return self.dataset_management.create_parameter_dictionary(name=name, parameter_context_ids=self.context_ids, temporal_context='time') def create_stream_def(self, name='', pdict_id=''): return self.pubsub_management.create_stream_definition(name=name, parameter_dictionary_id=pdict_id) def create_data_product(self, name='', description='', stream_def_id=''): tdom, sdom = time_series_domain() tdom = tdom.dump() sdom = sdom.dump() dp_obj = DataProduct( name=name, description=description, processing_level_code='Parsed_Canonical', temporal_domain=tdom, spatial_domain=sdom) data_product_id = self.data_product_management.create_data_product(data_product=dp_obj, stream_definition_id=stream_def_id) self.data_product_management.activate_data_product_persistence(data_product_id) return data_product_id def register_external_dataset(self, external_dataset_id=''): return self.data_acquisition_management.register_external_data_set(external_dataset_id=external_dataset_id) def get_dataset_id(self, data_product_id=''): dataset_ids, assocs = self.resource_registry.find_objects(subject=data_product_id, predicate='hasDataset', id_only=True) return dataset_ids[0] def get_stream_id_and_route(self, data_product_id): stream_ids, _ = self.resource_registry.find_objects(data_product_id, PRED.hasStream, RT.Stream, id_only=True) stream_id = stream_ids[0] route = self.pubsub_management.read_stream_route(stream_id) #self.create_logger(self.name, stream_id) return stream_id, route def start_agent(self): agent_config = { 'driver_config': self.get_dvr_config(), 'stream_config': {}, 'agent': {'resource_id': self.external_dataset_id}, 'test_mode': True } _ia_pid = self.container.spawn_process( name=self.EDA_NAME, module=self.EDA_MOD, cls=self.EDA_CLS, config=agent_config) self._ia_client = ResourceAgentClient(self.external_dataset_id, process=FakeProcess()) cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=ResourceAgentEvent.RUN) self._ia_client.execute_agent(cmd) cmd = AgentCommand(command=DriverEvent.START_AUTOSAMPLE) self._ia_client.execute_resource(command=cmd) self.start_listener(self.dataset_id) def stop_agent(self): cmd = AgentCommand(command=DriverEvent.STOP_AUTOSAMPLE) self._ia_client.execute_resource(cmd) cmd = AgentCommand(command=ResourceAgentEvent.RESET) self._ia_client.execute_agent(cmd) def start_listener(self, dataset_id=''): dataset_modified = Event() #callback to use retrieve to get data from the coverage def cb(*args, **kwargs): self.get_retrieve_client(dataset_id=dataset_id) #callback to keep execution going once dataset has been fully ingested def cb2(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id) es.start() es2 = EventSubscriber(event_type=OT.DeviceCommonLifecycleEvent, callback=cb2, origin='BaseDataHandler._acquire_sample') es2.start() self.addCleanup(es.stop) self.addCleanup(es2.stop) #let it go for up to 120 seconds, then stop the agent and reset it dataset_modified.wait(120) self.stop_agent() def create_logger(self, name, stream_id=''): # logger process producer_definition = ProcessDefinition(name=name+'_logger') producer_definition.executable = { 'module':'ion.processes.data.stream_granule_logger', 'class':'StreamGranuleLogger' } logger_procdef_id = self.process_dispatch_client.create_process_definition(process_definition=producer_definition) configuration = { 'process':{ 'stream_id':stream_id, } } pid = self.process_dispatch_client.schedule_process(process_definition_id=logger_procdef_id, configuration=configuration) return pid
class TestOmsLaunch(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.omsclient = ObservatoryManagementServiceClient(node=self.container.node) self.imsclient = InstrumentManagementServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node) self.dpclient = DataProductManagementServiceClient(node=self.container.node) self.pubsubcli = PubsubManagementServiceClient(node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node) self.dataset_management = DatasetManagementServiceClient() # Use the network definition provided by RSN OMS directly. rsn_oms = CIOMSClientFactory.create_instance(DVR_CONFIG['oms_uri']) self._network_definition = RsnOmsUtil.build_network_definition(rsn_oms) # get serialized version for the configuration: self._network_definition_ser = NetworkUtil.serialize_network_definition(self._network_definition) if log.isEnabledFor(logging.DEBUG): log.debug("NetworkDefinition serialization:\n%s", self._network_definition_ser) self.platformModel_id = None self.all_platforms = {} self.agent_streamconfig_map = {} self._async_data_result = AsyncResult() self._data_subscribers = [] self._samples_received = [] self.addCleanup(self._stop_data_subscribers) self._async_event_result = AsyncResult() self._event_subscribers = [] self._events_received = [] self.addCleanup(self._stop_event_subscribers) self._start_event_subscriber() self._set_up_DataProduct_obj() self._set_up_PlatformModel_obj() def _set_up_DataProduct_obj(self): # Create data product object to be used for each of the platform log streams tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() self.pdict_id = self.dataset_management.read_parameter_dictionary_by_name('platform_eng_parsed', id_only=True) self.platform_eng_stream_def_id = self.pubsubcli.create_stream_definition( name='platform_eng', parameter_dictionary_id=self.pdict_id) self.dp_obj = IonObject(RT.DataProduct, name='platform_eng data', description='platform_eng test', temporal_domain = tdom, spatial_domain = sdom) def _set_up_PlatformModel_obj(self): # Create PlatformModel platformModel_obj = IonObject(RT.PlatformModel, name='RSNPlatformModel', description="RSNPlatformModel") try: self.platformModel_id = self.imsclient.create_platform_model(platformModel_obj) except BadRequest as ex: self.fail("failed to create new PLatformModel: %s" %ex) log.debug( 'new PlatformModel id = %s', self.platformModel_id) def _traverse(self, pnode, platform_id, parent_platform_objs=None): """ Recursive routine that repeatedly calls _prepare_platform to build the object dictionary for each platform. @param pnode PlatformNode @param platform_id ID of the platform to be visited @param parent_platform_objs dict of objects associated to parent platform, if any. @retval the dict returned by _prepare_platform at this level. """ log.info("Starting _traverse for %r", platform_id) plat_objs = self._prepare_platform(pnode, platform_id, parent_platform_objs) self.all_platforms[platform_id] = plat_objs # now, traverse the children: for sub_pnode in pnode.subplatforms.itervalues(): subplatform_id = sub_pnode.platform_id self._traverse(sub_pnode, subplatform_id, plat_objs) return plat_objs def _prepare_platform(self, pnode, platform_id, parent_platform_objs): """ This routine generalizes the manual construction originally done in test_oms_launch.py. It is called by the recursive _traverse method so all platforms starting from a given base platform are prepared. Note: For simplicity in this test, sites are organized in the same hierarchical way as the platforms themselves. @param pnode PlatformNode @param platform_id ID of the platform to be visited @param parent_platform_objs dict of objects associated to parent platform, if any. @retval a dict of associated objects similar to those in test_oms_launch """ site__obj = IonObject(RT.PlatformSite, name='%s_PlatformSite' % platform_id, description='%s_PlatformSite platform site' % platform_id) site_id = self.omsclient.create_platform_site(site__obj) if parent_platform_objs: # establish hasSite association with the parent self.rrclient.create_association( subject=parent_platform_objs['site_id'], predicate=PRED.hasSite, object=site_id) # prepare platform attributes and ports: monitor_attribute_objs, monitor_attribute_dicts = self._prepare_platform_attributes(pnode, platform_id) port_objs, port_dicts = self._prepare_platform_ports(pnode, platform_id) device__obj = IonObject(RT.PlatformDevice, name='%s_PlatformDevice' % platform_id, description='%s_PlatformDevice platform device' % platform_id, # ports=port_objs, # platform_monitor_attributes = monitor_attribute_objs ) device__dict = dict(ports=port_dicts, platform_monitor_attributes=monitor_attribute_dicts) self.device_id = self.imsclient.create_platform_device(device__obj) self.imsclient.assign_platform_model_to_platform_device(self.platformModel_id, self.device_id) self.rrclient.create_association(subject=site_id, predicate=PRED.hasDevice, object=self.device_id) self.damsclient.register_instrument(instrument_id=self.device_id) if parent_platform_objs: # establish hasDevice association with the parent self.rrclient.create_association( subject=parent_platform_objs['device_id'], predicate=PRED.hasDevice, object=self.device_id) agent__obj = IonObject(RT.PlatformAgent, name='%s_PlatformAgent' % platform_id, description='%s_PlatformAgent platform agent' % platform_id) agent_id = self.imsclient.create_platform_agent(agent__obj) if parent_platform_objs: # add this platform_id to parent's children: parent_platform_objs['children'].append(platform_id) self.imsclient.assign_platform_model_to_platform_agent(self.platformModel_id, agent_id) # agent_instance_obj = IonObject(RT.PlatformAgentInstance, # name='%s_PlatformAgentInstance' % platform_id, # description="%s_PlatformAgentInstance" % platform_id) # # agent_instance_id = self.imsclient.create_platform_agent_instance( # agent_instance_obj, agent_id, device_id) plat_objs = { 'platform_id': platform_id, 'site__obj': site__obj, 'site_id': site_id, 'device__obj': device__obj, 'device_id': self.device_id, 'agent__obj': agent__obj, 'agent_id': agent_id, # 'agent_instance_obj': agent_instance_obj, # 'agent_instance_id': agent_instance_id, 'children': [] } log.info("plat_objs for platform_id %r = %s", platform_id, str(plat_objs)) stream_config = self._create_stream_config(plat_objs) self.agent_streamconfig_map[platform_id] = stream_config # self.agent_streamconfig_map[platform_id] = None # self._start_data_subscriber(agent_instance_id, stream_config) return plat_objs def _prepare_platform_attributes(self, pnode, platform_id): """ Returns the list of PlatformMonitorAttributes objects corresponding to the attributes associated to the given platform. """ # TODO complete the clean-up of this method ret_infos = dict((n, a.defn) for (n, a) in pnode.attrs.iteritems()) monitor_attribute_objs = [] monitor_attribute_dicts = [] for attrName, attrDfn in ret_infos.iteritems(): log.debug("platform_id=%r: preparing attribute=%r", platform_id, attrName) monitor_rate = attrDfn['monitorCycleSeconds'] units = attrDfn['units'] plat_attr_obj = IonObject(OT.PlatformMonitorAttributes, id=attrName, monitor_rate=monitor_rate, units=units) plat_attr_dict = dict(id=attrName, monitor_rate=monitor_rate, units=units) monitor_attribute_objs.append(plat_attr_obj) monitor_attribute_dicts.append(plat_attr_dict) return monitor_attribute_objs, monitor_attribute_dicts def _prepare_platform_ports(self, pnode, platform_id): """ Returns the list of PlatformPort objects corresponding to the ports associated to the given platform. """ # TODO complete the clean-up of this method port_objs = [] port_dicts = [] for port_id, network in pnode.ports.iteritems(): log.debug("platform_id=%r: preparing port=%r network=%s", platform_id, port_id, network) # # Note: the name "IP" address has been changed to "network" address # in the CI-OMS interface spec. # plat_port_obj = IonObject(OT.PlatformPort, port_id=port_id, ip_address=network) plat_port_dict = dict(port_id=port_id, network=network) port_objs.append(plat_port_obj) port_dicts.append(plat_port_dict) return port_objs, port_dicts def _create_stream_config(self, plat_objs): platform_id = plat_objs['platform_id'] device_id = plat_objs['device_id'] #create the log data product self.dp_obj.name = '%s platform_eng data' % platform_id self.data_product_id = self.dpclient.create_data_product(data_product=self.dp_obj, stream_definition_id=self.platform_eng_stream_def_id) self.damsclient.assign_data_product(input_resource_id=self.device_id, data_product_id=self.data_product_id) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(self.data_product_id, PRED.hasStream, None, True) stream_config = self._build_stream_config(stream_ids[0]) return stream_config def _build_stream_config(self, stream_id=''): platform_eng_dictionary = DatasetManagementService.get_parameter_dictionary_by_name('platform_eng_parsed') #get the streamroute object from pubsub by passing the stream_id stream_def_ids, _ = self.rrclient.find_objects(stream_id, PRED.hasStreamDefinition, RT.StreamDefinition, True) stream_route = self.pubsubcli.read_stream_route(stream_id=stream_id) stream_config = {'routing_key' : stream_route.routing_key, 'stream_id' : stream_id, 'stream_definition_ref' : stream_def_ids[0], 'exchange_point' : stream_route.exchange_point, 'parameter_dictionary':platform_eng_dictionary.dump()} return stream_config def _set_platform_agent_instances(self): """ Once most of the objs/defs associated with all platforms are in place, this method creates and associates the PlatformAgentInstance elements. """ self.platform_configs = {} for platform_id, plat_objs in self.all_platforms.iteritems(): PLATFORM_CONFIG = { 'platform_id': platform_id, 'agent_streamconfig_map': None, #self.agent_streamconfig_map, 'driver_config': DVR_CONFIG, 'network_definition' : self._network_definition_ser } self.platform_configs[platform_id] = { 'platform_id': platform_id, 'agent_streamconfig_map': self.agent_streamconfig_map, 'driver_config': DVR_CONFIG, 'network_definition' : self._network_definition_ser } agent_config = { 'platform_config': PLATFORM_CONFIG, } self.stream_id = self.agent_streamconfig_map[platform_id]['stream_id'] # import pprint # print '============== platform id within unit test: %s ===========' % platform_id # pprint.pprint(agent_config) #agent_config['platform_config']['agent_streamconfig_map'] = None agent_instance_obj = IonObject(RT.PlatformAgentInstance, name='%s_PlatformAgentInstance' % platform_id, description="%s_PlatformAgentInstance" % platform_id, agent_config=agent_config) agent_id = plat_objs['agent_id'] device_id = plat_objs['device_id'] agent_instance_id = self.imsclient.create_platform_agent_instance( agent_instance_obj, agent_id, self.device_id) plat_objs['agent_instance_obj'] = agent_instance_obj plat_objs['agent_instance_id'] = agent_instance_id stream_config = self.agent_streamconfig_map[platform_id] self._start_data_subscriber(agent_instance_id, stream_config) def _start_data_subscriber(self, stream_name, stream_config): """ Starts data subscriber for the given stream_name and stream_config """ def consume_data(message, stream_route, stream_id): # A callback for processing subscribed-to data. log.info('Subscriber received data message: %s.', str(message)) self._samples_received.append(message) self._async_data_result.set() log.info('_start_data_subscriber stream_name=%r', stream_name) stream_id = self.stream_id #stream_config['stream_id'] # Create subscription for the stream exchange_name = '%s_queue' % stream_name self.container.ex_manager.create_xn_queue(exchange_name).purge() sub = StandaloneStreamSubscriber(exchange_name, consume_data) sub.start() self._data_subscribers.append(sub) sub_id = self.pubsubcli.create_subscription(name=exchange_name, stream_ids=[stream_id]) self.pubsubcli.activate_subscription(sub_id) sub.subscription_id = sub_id def _stop_data_subscribers(self): """ Stop the data subscribers on cleanup. """ try: for sub in self._data_subscribers: if hasattr(sub, 'subscription_id'): try: self.pubsubcli.deactivate_subscription(sub.subscription_id) except: pass self.pubsubcli.delete_subscription(sub.subscription_id) sub.stop() finally: self._data_subscribers = [] def _start_event_subscriber(self, event_type="DeviceEvent", sub_type="platform_event"): """ Starts event subscriber for events of given event_type ("DeviceEvent" by default) and given sub_type ("platform_event" by default). """ def consume_event(evt, *args, **kwargs): # A callback for consuming events. log.info('Event subscriber received evt: %s.', str(evt)) self._events_received.append(evt) self._async_event_result.set(evt) sub = EventSubscriber(event_type=event_type, sub_type=sub_type, callback=consume_event) sub.start() log.info("registered event subscriber for event_type=%r, sub_type=%r", event_type, sub_type) self._event_subscribers.append(sub) sub._ready_event.wait(timeout=EVENT_TIMEOUT) def _stop_event_subscribers(self): """ Stops the event subscribers on cleanup. """ try: for sub in self._event_subscribers: if hasattr(sub, 'subscription_id'): try: self.pubsubcli.deactivate_subscription(sub.subscription_id) except: pass self.pubsubcli.delete_subscription(sub.subscription_id) sub.stop() finally: self._event_subscribers = [] @skip("IMS does't net implement topology") def test_hierarchy(self): self._create_launch_verify(BASE_PLATFORM_ID) @skip("Needs alignment with recent IMS changes") def test_single_platform(self): self._create_launch_verify('LJ01D') def _create_launch_verify(self, base_platform_id): # and trigger the traversal of the branch rooted at that base platform # to create corresponding ION objects and configuration dictionaries: pnode = self._network_definition.pnodes[base_platform_id] base_platform_objs = self._traverse(pnode, base_platform_id) # now that most of the topology information is there, add the # PlatformAgentInstance elements self._set_platform_agent_instances() base_platform_config = self.platform_configs[base_platform_id] log.info("base_platform_id = %r", base_platform_id) #------------------------------------------------------------------------------------- # Create Data Process Definition and Data Process for the eng stream monitor process #------------------------------------------------------------------------------------- dpd_obj = IonObject(RT.DataProcessDefinition, name='DemoStreamAlertTransform', description='For testing EventTriggeredTransform_B', module='ion.processes.data.transforms.event_alert_transform', class_name='DemoStreamAlertTransform') self.platform_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) #THERE SHOULD BE NO STREAMDEF REQUIRED HERE. platform_streamdef_id = self.pubsubcli.create_stream_definition(name='platform_eng_parsed', parameter_dictionary_id=self.pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(platform_streamdef_id, self.platform_dprocdef_id, binding='output' ) config = { 'process':{ 'timer_interval': 5, 'queue_name': 'a_queue', 'variable_name': 'input_voltage', 'time_field_name': 'preferred_timestamp', 'valid_values': [-100, 100], 'timer_origin': 'Interval Timer' } } platform_data_process_id = self.dataprocessclient.create_data_process(self.platform_dprocdef_id, [self.data_product_id], {}, config) self.dataprocessclient.activate_data_process(platform_data_process_id) self.addCleanup(self.dataprocessclient.delete_data_process, platform_data_process_id) #------------------------------- # Launch Base Platform AgentInstance, connect to the resource agent client #------------------------------- agent_instance_id = base_platform_objs['agent_instance_id'] log.debug("about to call imsclient.start_platform_agent_instance with id=%s", agent_instance_id) pid = self.imsclient.start_platform_agent_instance(platform_agent_instance_id=agent_instance_id) log.debug("start_platform_agent_instance returned pid=%s", pid) #wait for start instance_obj = self.imsclient.read_platform_agent_instance(agent_instance_id) gate = ProcessStateGate(self.processdispatchclient.read_process, instance_obj.agent_process_id, ProcessStateEnum.RUNNING) self.assertTrue(gate.await(90), "The platform agent instance did not spawn in 90 seconds") agent_instance_obj= self.imsclient.read_instrument_agent_instance(agent_instance_id) log.debug('test_oms_create_and_launch: Platform agent instance obj: %s', str(agent_instance_obj)) # Start a resource agent client to talk with the instrument agent. self._pa_client = ResourceAgentClient('paclient', name=agent_instance_obj.agent_process_id, process=FakeProcess()) log.debug(" test_oms_create_and_launch:: got pa client %s", str(self._pa_client)) log.debug("base_platform_config =\n%s", base_platform_config) # ping_agent can be issued before INITIALIZE retval = self._pa_client.ping_agent(timeout=TIMEOUT) log.debug( 'Base Platform ping_agent = %s', str(retval) ) # issue INITIALIZE command to the base platform, which will launch the # creation of the whole platform hierarchy rooted at base_platform_config['platform_id'] # cmd = AgentCommand(command=PlatformAgentEvent.INITIALIZE, kwargs=dict(plat_config=base_platform_config)) cmd = AgentCommand(command=PlatformAgentEvent.INITIALIZE) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug( 'Base Platform INITIALIZE = %s', str(retval) ) # GO_ACTIVE cmd = AgentCommand(command=PlatformAgentEvent.GO_ACTIVE) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug( 'Base Platform GO_ACTIVE = %s', str(retval) ) # RUN: cmd = AgentCommand(command=PlatformAgentEvent.RUN) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug( 'Base Platform RUN = %s', str(retval) ) # START_MONITORING: cmd = AgentCommand(command=PlatformAgentEvent.START_MONITORING) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug( 'Base Platform START_MONITORING = %s', str(retval) ) # wait for data sample # just wait for at least one -- see consume_data above log.info("waiting for reception of a data sample...") self._async_data_result.get(timeout=DATA_TIMEOUT) self.assertTrue(len(self._samples_received) >= 1) log.info("waiting a bit more for reception of more data samples...") sleep(15) log.info("Got data samples: %d", len(self._samples_received)) # wait for event # just wait for at least one event -- see consume_event above log.info("waiting for reception of an event...") self._async_event_result.get(timeout=EVENT_TIMEOUT) log.info("Received events: %s", len(self._events_received)) #get the extended platfrom which wil include platform aggreate status fields extended_platform = self.imsclient.get_platform_device_extension(self.device_id) # log.debug( 'test_single_platform extended_platform: %s', str(extended_platform) ) # log.debug( 'test_single_platform power_status_roll_up: %s', str(extended_platform.computed.power_status_roll_up.value) ) # log.debug( 'test_single_platform comms_status_roll_up: %s', str(extended_platform.computed.communications_status_roll_up.value) ) # STOP_MONITORING: cmd = AgentCommand(command=PlatformAgentEvent.STOP_MONITORING) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug( 'Base Platform STOP_MONITORING = %s', str(retval) ) # GO_INACTIVE cmd = AgentCommand(command=PlatformAgentEvent.GO_INACTIVE) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug( 'Base Platform GO_INACTIVE = %s', str(retval) ) # RESET: Resets the base platform agent, which includes termination of # its sub-platforms processes: cmd = AgentCommand(command=PlatformAgentEvent.RESET) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug( 'Base Platform RESET = %s', str(retval) ) #------------------------------- # Stop Base Platform AgentInstance #------------------------------- self.imsclient.stop_platform_agent_instance(platform_agent_instance_id=agent_instance_id)