class TestTransformWorker(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Instantiate a process to represent the test process=TransformWorkerTestProcess() self.dataset_management_client = DatasetManagementServiceClient(node=self.container.node) self.pubsub_client = PubsubManagementServiceClient(node=self.container.node) self.dataproductclient = DataProductManagementServiceClient(node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node) self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.imsclient = InstrumentManagementServiceProcessClient(node=self.container.node, process = process) self.time_dom, self.spatial_dom = time_series_domain() self.ph = ParameterHelper(self.dataset_management_client, self.addCleanup) self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10) def push_granule(self, data_product_id): ''' Publishes and monitors that the granule arrived ''' datasets, _ = self.rrclient.find_objects(data_product_id, PRED.hasDataset, id_only=True) dataset_monitor = DatasetMonitor(datasets[0]) rdt = self.ph.rdt_for_data_product(data_product_id) self.ph.fill_parsed_rdt(rdt) self.ph.publish_rdt_to_data_product(data_product_id, rdt) assert dataset_monitor.wait() dataset_monitor.stop() @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.dp_list = [] self.data_process_objs = [] self._output_stream_ids = [] self.granule_verified = Event() self.worker_assigned_event_verified = Event() self.dp_created_event_verified = Event() self.heartbeat_event_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] self.start_event_listener() # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process() self.dp_list.append(dataprocess_id) # validate the repository for data product algorithms persists the new resources NEW SA-1 # create_data_process call created one of each dpd_ids, _ = self.rrclient.find_resources(restype=OT.DataProcessDefinition, id_only=False) # there will be more than one becuase of the DPDs that reperesent the PFs in the data product above self.assertTrue(dpd_ids is not None) dp_ids, _ = self.rrclient.find_resources(restype=OT.DataProcess, id_only=False) # only one DP becuase the PFs that are in the code dataproduct above are not activated yet. self.assertEquals(len(dp_ids), 1) # validate the name and version label NEW SA - 2 dataprocessdef_obj = self.dataprocessclient.read_data_process_definition(dataprocessdef_id) self.assertEqual(dataprocessdef_obj.version_label, '1.0a') self.assertEqual(dataprocessdef_obj.name, 'add_arrays') # validate that the DPD has an attachment NEW SA - 21 attachment_ids, assoc_ids = self.rrclient.find_objects(dataprocessdef_id, PRED.hasAttachment, RT.Attachment, True) self.assertEqual(len(attachment_ids), 1) attachment_obj = self.rrclient.read_attachment(attachment_ids[0]) log.debug('attachment: %s', attachment_obj) # validate that the data process resource has input and output data products associated # L4-CI-SA-RQ-364 and NEW SA-3 outproduct_ids, assoc_ids = self.rrclient.find_objects(dataprocess_id, PRED.hasOutputProduct, RT.DataProduct, True) self.assertEqual(len(outproduct_ids), 1) inproduct_ids, assoc_ids = self.rrclient.find_objects(dataprocess_id, PRED.hasInputProduct, RT.DataProduct, True) self.assertEqual(len(inproduct_ids), 1) # Test for provenance. Get Data product produced by the data processes output_data_product_id,_ = self.rrclient.find_objects(subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance(output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 2) self.assertTrue(self.input_dp_id in output_data_product_provenance[output_data_product_id[0]]['parents']) self.assertTrue(output_data_product_provenance[output_data_product_id[0]]['parents'][self.input_dp_id]['data_process_definition_id'] == dataprocessdef_id) # NEW SA - 4 | Data processing shall include the appropriate data product algorithm name and version number in # the metadata of each output data product created by the data product algorithm. output_data_product_obj,_ = self.rrclient.find_objects(subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=False) self.assertTrue(output_data_product_obj[0].name != None) self.assertTrue(output_data_product_obj[0]._rev != None) # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects(subject=dataprocess_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route ) for n in range(1, 101): rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) # validate that the output granule is received and the updated value is correct self.assertTrue(self.granule_verified.wait(self.wait_time)) # validate that the data process loaded into worker event is received (L4-CI-SA-RQ-182) self.assertTrue(self.worker_assigned_event_verified.wait(self.wait_time)) # validate that the data process create (with data product ids) event is received (NEW SA -42) self.assertTrue(self.dp_created_event_verified.wait(self.wait_time)) # validate that the data process heartbeat event is received (for every hundred granules processed) (L4-CI-SA-RQ-182) #this takes a while so set wait limit to large value self.assertTrue(self.heartbeat_event_verified.wait(200)) # validate that the code from the transform function can be retrieve via inspect_data_process_definition src = self.dataprocessclient.inspect_data_process_definition(dataprocessdef_id) self.assertIn( 'def add_arrays(a, b)', src) # now delete the DPD and DP then verify that the resources are retired so that information required for provenance are still available self.dataprocessclient.delete_data_process(dataprocess_id) self.dataprocessclient.delete_data_process_definition(dataprocessdef_id) in_dp_objs, _ = self.rrclient.find_objects(subject=dataprocess_id, predicate=PRED.hasInputProduct, object_type=RT.DataProduct, id_only=True) self.assertTrue(in_dp_objs is not None) dpd_objs, _ = self.rrclient.find_subjects(subject_type=RT.DataProcessDefinition, predicate=PRED.hasDataProcess, object=dataprocess_id, id_only=True) self.assertTrue(dpd_objs is not None) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker_with_instrumentdevice(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # Create CTD Parsed as the initial data product # create a stream definition for the data from the ctd simulator self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id) # only ever need one device for testing purposes. instDevice_obj,_ = self.rrclient.find_resources(restype=RT.InstrumentDevice, name='test_ctd_device') if instDevice_obj: instDevice_id = instDevice_obj[0]._id else: instDevice_obj = IonObject(RT.InstrumentDevice, name='test_ctd_device', description="test_ctd_device", serial_number="12345" ) instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj) self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=self.input_dp_id) # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process() self.addCleanup(self.dataprocessclient.delete_data_process, dataprocess_id) self.addCleanup(self.dataprocessclient.delete_data_process_definition, dataprocessdef_id) # Test for provenance. Get Data product produced by the data processes output_data_product_id,_ = self.rrclient.find_objects(subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance(output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 3) self.assertTrue(self.input_dp_id in output_data_product_provenance[output_data_product_id[0]]['parents']) self.assertTrue(instDevice_id in output_data_product_provenance[self.input_dp_id]['parents']) self.assertTrue(output_data_product_provenance[instDevice_id]['type'] == 'InstrumentDevice') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker_with_platformdevice(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # Create CTD Parsed as the initial data product # create a stream definition for the data from the ctd simulator self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id) # only ever need one device for testing purposes. platform_device_obj,_ = self.rrclient.find_resources(restype=RT.PlatformDevice, name='TestPlatform') if platform_device_obj: platform_device_id = platform_device_obj[0]._id else: platform_device_obj = IonObject(RT.PlatformDevice, name='TestPlatform', description="TestPlatform", serial_number="12345" ) platform_device_id = self.imsclient.create_platform_device(platform_device=platform_device_obj) self.damsclient.assign_data_product(input_resource_id=platform_device_id, data_product_id=self.input_dp_id) # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process() self.addCleanup(self.dataprocessclient.delete_data_process, dataprocess_id) self.addCleanup(self.dataprocessclient.delete_data_process_definition, dataprocessdef_id) # Test for provenance. Get Data product produced by the data processes output_data_product_id,_ = self.rrclient.find_objects(subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance(output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 3) self.assertTrue(self.input_dp_id in output_data_product_provenance[output_data_product_id[0]]['parents']) self.assertTrue(platform_device_id in output_data_product_provenance[self.input_dp_id]['parents']) self.assertTrue(output_data_product_provenance[platform_device_id]['type'] == 'PlatformDevice') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_event_transform_worker(self): self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # test that a data process (type: data-product-in / event-out) can be defined and launched. # verify that event fields are correctly populated self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] # create the DPD and two DPs self.event_data_process_id = self.create_event_data_processes() # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects(subject=self.event_data_process_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_event_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route ) self.start_event_transform_listener() self.data_modified = Event() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) self.assertTrue(self.event_verified.wait(self.wait_time)) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_bad_argument_map(self): self._output_stream_ids = [] # test that a data process (type: data-product-in / data-product-out) parameter mapping it validated during # data process creation and that the correct exception is raised for both input and output. self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # two data processes using one transform and one DPD dp1_func_output_dp_id = self.create_output_data_product() # Set up DPD and DP #2 - array add function tf_obj = IonObject(RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM, uri='http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject(RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS ) add_array_dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, add_array_dpd_id, binding='add_array_func' ) # create the data process with invalid argument map argument_map = {"arr1": "foo", "arr2": "bar"} output_param = "salinity" with self.assertRaises(BadRequest) as cm: dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) ex = cm.exception log.debug(' exception raised: %s', cm) self.assertEqual(ex.message, "Input data product does not contain the parameters defined in argument map") # create the data process with invalid output parameter name argument_map = {"arr1": "conductivity", "arr2": "pressure"} output_param = "foo" with self.assertRaises(BadRequest) as cm: dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) ex = cm.exception log.debug(' exception raised: %s', cm) self.assertEqual(ex.message, "Output data product does not contain the output parameter name provided") def create_event_data_processes(self): # two data processes using one transform and one DPD argument_map= {"a": "salinity"} # set up DPD and DP #2 - array add function tf_obj = IonObject(RT.TransformFunction, name='validate_salinity_array', description='validate_salinity_array', function='validate_salinity_array', module="ion.processes.data.transforms.test.test_transform_worker", arguments=['a'], function_type=TransformFunctionType.TRANSFORM ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject(RT.DataProcessDefinition, name='validate_salinity_array', description='validate_salinity_array', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, ) add_array_dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, add_array_dpd_id, binding='validate_salinity_array' ) # create the data process dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=None, argument_map=argument_map) self.damsclient.register_process(dp1_data_process_id) self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id) return dp1_data_process_id def create_data_process(self): # two data processes using one transform and one DPD dp1_func_output_dp_id = self.create_output_data_product() argument_map = {"arr1": "conductivity", "arr2": "pressure"} output_param = "salinity" # set up DPD and DP #2 - array add function tf_obj = IonObject(RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM, uri='http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject(RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, version_label='1.0a' ) add_array_dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, add_array_dpd_id, binding='add_array_func' ) # create the data process dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) self.damsclient.register_process(dp1_data_process_id) #self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id) # add an attachment object to this DPD to test new SA-21 import msgpack attachment_content = 'foo bar' attachment_obj = IonObject( RT.Attachment, name='test_attachment', attachment_type=AttachmentType.ASCII, content_type='text/plain', content=msgpack.packb(attachment_content)) att_id = self.rrclient.create_attachment(add_array_dpd_id, attachment_obj) self.addCleanup(self.rrclient.delete_attachment, att_id) return add_array_dpd_id, dp1_data_process_id, dp1_func_output_dp_id def create_output_data_product(self): dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition(name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id) dp1_output_dp_obj = IonObject( RT.DataProduct, name='data_process1_data_product', description='output of add array func', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) dp1_func_output_dp_id = self.dataproductclient.create_data_product(dp1_output_dp_obj, dp1_outgoing_stream_id) self.addCleanup(self.dataproductclient.delete_data_product, dp1_func_output_dp_id) # retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id, PRED.hasStream, None, True) self._output_stream_ids.append(stream_ids[0]) subscription_id = self.pubsub_client.create_subscription('validator', data_product_ids=[dp1_func_output_dp_id]) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) def on_granule(msg, route, stream_id): log.debug('recv_packet stream_id: %s route: %s msg: %s', stream_id, route, msg) self.validate_output_granule(msg, route, stream_id) self.granule_verified.set() validator = StandaloneStreamSubscriber('validator', callback=on_granule) validator.start() self.addCleanup(validator.stop) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) return dp1_func_output_dp_id def validate_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ data_process_event = args[0] log.debug("DataProcessStatusEvent: %s" , str(data_process_event.__dict__)) # if data process already created, check origin if self.dp_list: self.assertIn( data_process_event.origin, self.dp_list) # if this is a heartbeat event then 100 granules have been processed if 'data process status update.' in data_process_event.description: self.heartbeat_event_verified.set() else: # else check that this is the assign event if 'Data process assigned to transform worker' in data_process_event.description: self.worker_assigned_event_verified.set() elif 'Data process created for data product' in data_process_event.description: self.dp_created_event_verified.set() def validate_output_granule(self, msg, route, stream_id): self.assertIn( stream_id, self._output_stream_ids) rdt = RecordDictionaryTool.load_from_granule(msg) log.debug('validate_output_granule rdt: %s', rdt) sal_val = rdt['salinity'] np.testing.assert_array_equal(sal_val, np.array([3])) def start_event_listener(self): es = EventSubscriber(event_type=OT.DataProcessStatusEvent, callback=self.validate_event) es.start() self.addCleanup(es.stop) def validate_transform_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ status_alert_event = args[0] np.testing.assert_array_equal(status_alert_event.origin, self.stream_id ) np.testing.assert_array_equal(status_alert_event.values, np.array([self.event_data_process_id])) log.debug("DeviceStatusAlertEvent: %s" , str(status_alert_event.__dict__)) self.event_verified.set() def start_event_transform_listener(self): es = EventSubscriber(event_type=OT.DeviceStatusAlertEvent, callback=self.validate_transform_event) es.start() self.addCleanup(es.stop) def test_download(self): egg_url = 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' egg_path = TransformWorker.download_egg(egg_url) import pkg_resources pkg_resources.working_set.add_entry(egg_path) from ion_example.add_arrays import add_arrays a = add_arrays(1,2) self.assertEquals(a,3)
class TestIntDataProcessManagementServiceMultiOut(IonIntegrationTestCase): def setUp(self): # Start container self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node) self.pubsubclient = PubsubManagementServiceClient(node=self.container.node) self.ingestclient = IngestionManagementServiceClient(node=self.container.node) self.imsclient = InstrumentManagementServiceClient(node=self.container.node) self.dataproductclient = DataProductManagementServiceClient(node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node) self.datasetclient = DatasetManagementServiceClient(node=self.container.node) self.dataset_management = self.datasetclient def test_createDataProcess(self): #--------------------------------------------------------------------------- # Data Process Definition #--------------------------------------------------------------------------- dpd_obj = IonObject(RT.DataProcessDefinition, name='ctd_L0_all', description='transform ctd package into three separate L0 streams', module='ion.processes.data.transforms.ctd.ctd_L0_all', class_name='ctd_L0_all') dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) # Make assertion on the newly registered data process definition data_process_definition = self.rrclient.read(dprocdef_id) self.assertEquals(data_process_definition.name, 'ctd_L0_all') self.assertEquals(data_process_definition.description, 'transform ctd package into three separate L0 streams') self.assertEquals(data_process_definition.module, 'ion.processes.data.transforms.ctd.ctd_L0_all') self.assertEquals(data_process_definition.class_name, 'ctd_L0_all') # Read the data process definition using data process management and make assertions dprocdef_obj = self.dataprocessclient.read_data_process_definition(dprocdef_id) self.assertEquals(dprocdef_obj.class_name,'ctd_L0_all') self.assertEquals(dprocdef_obj.module,'ion.processes.data.transforms.ctd.ctd_L0_all') #--------------------------------------------------------------------------- # Create an input instrument #--------------------------------------------------------------------------- instrument_obj = IonObject(RT.InstrumentDevice, name='Inst1',description='an instrument that is creating the data product') instrument_id, rev = self.rrclient.create(instrument_obj) # Register the instrument so that the data producer and stream object are created data_producer_id = self.damsclient.register_instrument(instrument_id) # create a stream definition for the data from the ctd simulator pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubclient.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_input_stream_definition_to_data_process_definition(ctd_stream_def_id, dprocdef_id ) # Assert that the link between the stream definition and the data process definition was done assocs = self.rrclient.find_associations(subject=dprocdef_id, predicate=PRED.hasInputStreamDefinition, object=ctd_stream_def_id, id_only=True) self.assertIsNotNone(assocs) #--------------------------------------------------------------------------- # Input Data Product #--------------------------------------------------------------------------- tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() input_dp_obj = IonObject( RT.DataProduct, name='InputDataProduct', description='some new dp', temporal_domain = tdom, spatial_domain = sdom) input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=ctd_stream_def_id, exchange_point='test') #Make assertions on the input data product created input_dp_obj = self.rrclient.read(input_dp_id) self.assertEquals(input_dp_obj.name, 'InputDataProduct') self.assertEquals(input_dp_obj.description, 'some new dp') self.damsclient.assign_data_product(instrument_id, input_dp_id) # Retrieve the stream via the DataProduct->Stream associations stream_ids, _ = self.rrclient.find_objects(input_dp_id, PRED.hasStream, None, True) self.in_stream_id = stream_ids[0] #--------------------------------------------------------------------------- # Output Data Product #--------------------------------------------------------------------------- outgoing_stream_conductivity_id = self.pubsubclient.create_stream_definition(name='conductivity', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_conductivity_id, dprocdef_id,binding='conductivity' ) outgoing_stream_pressure_id = self.pubsubclient.create_stream_definition(name='pressure', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_pressure_id, dprocdef_id, binding='pressure' ) outgoing_stream_temperature_id = self.pubsubclient.create_stream_definition(name='temperature', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_temperature_id, dprocdef_id, binding='temperature' ) self.output_products={} output_dp_obj = IonObject(RT.DataProduct, name='conductivity', description='transform output conductivity', temporal_domain = tdom, spatial_domain = sdom) output_dp_id_1 = self.dataproductclient.create_data_product(output_dp_obj, outgoing_stream_conductivity_id) self.output_products['conductivity'] = output_dp_id_1 output_dp_obj = IonObject(RT.DataProduct, name='pressure', description='transform output pressure', temporal_domain = tdom, spatial_domain = sdom) output_dp_id_2 = self.dataproductclient.create_data_product(output_dp_obj, outgoing_stream_pressure_id) self.output_products['pressure'] = output_dp_id_2 output_dp_obj = IonObject(RT.DataProduct, name='temperature', description='transform output ', temporal_domain = tdom, spatial_domain = sdom) output_dp_id_3 = self.dataproductclient.create_data_product(output_dp_obj, outgoing_stream_temperature_id) self.output_products['temperature'] = output_dp_id_3 #--------------------------------------------------------------------------- # Create the data process #--------------------------------------------------------------------------- def _create_data_process(): dproc_id = self.dataprocessclient.create_data_process(dprocdef_id, [input_dp_id], self.output_products) return dproc_id dproc_id = _create_data_process() # Make assertions on the data process created data_process = self.dataprocessclient.read_data_process(dproc_id) # Assert that the data process has a process id attached self.assertIsNotNone(data_process.process_id) # Assert that the data process got the input data product's subscription id attached as its own input_susbcription_id attribute self.assertIsNotNone(data_process.input_subscription_id) output_data_product_ids = self.rrclient.find_objects(subject=dproc_id, predicate=PRED.hasOutputProduct, object_type=RT.DataProduct, id_only=True) self.assertEquals(Set(output_data_product_ids[0]), Set([output_dp_id_1,output_dp_id_2,output_dp_id_3])) @patch.dict(CFG, {'endpoint':{'receive':{'timeout': 60}}}) def test_createDataProcessUsingSim(self): #------------------------------- # Create InstrumentModel #------------------------------- instModel_obj = IonObject(RT.InstrumentModel, name='SBE37IMModel', description="SBE37IMModel" ) instModel_id = self.imsclient.create_instrument_model(instModel_obj) #------------------------------- # Create InstrumentAgent #------------------------------- instAgent_obj = IonObject(RT.InstrumentAgent, name='agent007', description="SBE37IMAgent", driver_module="mi.instrument.seabird.sbe37smb.ooicore.driver", driver_class="SBE37Driver" ) instAgent_id = self.imsclient.create_instrument_agent(instAgent_obj) self.imsclient.assign_instrument_model_to_instrument_agent(instModel_id, instAgent_id) #------------------------------- # Create InstrumentDevice #------------------------------- instDevice_obj = IonObject(RT.InstrumentDevice, name='SBE37IMDevice', description="SBE37IMDevice", serial_number="12345" ) instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj) self.imsclient.assign_instrument_model_to_instrument_device(instModel_id, instDevice_id) #------------------------------- # Create InstrumentAgentInstance to hold configuration information #------------------------------- port_agent_config = { 'device_addr': 'sbe37-simulator.oceanobservatories.org', 'device_port': 4001, 'process_type': PortAgentProcessType.UNIX, 'binary_path': "port_agent", 'command_port': 4002, 'data_port': 4003, 'log_level': 5, } instAgentInstance_obj = IonObject(RT.InstrumentAgentInstance, name='SBE37IMAgentInstance', description="SBE37IMAgentInstance", svr_addr="localhost", comms_device_address=CFG.device.sbe37.host, comms_device_port=CFG.device.sbe37.port, port_agent_config = port_agent_config) instAgentInstance_id = self.imsclient.create_instrument_agent_instance(instAgentInstance_obj, instAgent_id, instDevice_id) #------------------------------- # Create CTD Parsed as the first data product #------------------------------- # create a stream definition for the data from the ctd simulator pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubclient.create_stream_definition(name='SBE32_CDM', parameter_dictionary_id=pdict_id) # Construct temporal and spatial Coordinate Reference System objects tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() dp_obj = IonObject(RT.DataProduct, name='ctd_parsed', description='ctd stream test', temporal_domain = tdom, spatial_domain = sdom) ctd_parsed_data_product = self.dataproductclient.create_data_product(dp_obj, ctd_stream_def_id) self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=ctd_parsed_data_product) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product, PRED.hasStream, None, True) #------------------------------- # Create CTD Raw as the second data product #------------------------------- raw_stream_def_id = self.pubsubclient.create_stream_definition(name='SBE37_RAW', parameter_dictionary_id=pdict_id) dp_obj.name = 'ctd_raw' ctd_raw_data_product = self.dataproductclient.create_data_product(dp_obj, raw_stream_def_id) self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=ctd_raw_data_product) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(ctd_raw_data_product, PRED.hasStream, None, True) #------------------------------- # L0 Conductivity - Temperature - Pressure: Data Process Definition #------------------------------- dpd_obj = IonObject(RT.DataProcessDefinition, name='ctd_L0_all', description='transform ctd package into three separate L0 streams', module='ion.processes.data.transforms.ctd.ctd_L0_all', class_name='ctd_L0_all') ctd_L0_all_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) #------------------------------- # L0 Conductivity - Temperature - Pressure: Output Data Products #------------------------------- outgoing_stream_l0_conductivity_id = self.pubsubclient.create_stream_definition(name='L0_Conductivity', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l0_conductivity_id, ctd_L0_all_dprocdef_id, binding='conductivity' ) outgoing_stream_l0_pressure_id = self.pubsubclient.create_stream_definition(name='L0_Pressure', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l0_pressure_id, ctd_L0_all_dprocdef_id, binding='pressure' ) outgoing_stream_l0_temperature_id = self.pubsubclient.create_stream_definition(name='L0_Temperature', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l0_temperature_id, ctd_L0_all_dprocdef_id, binding='temperature' ) self.output_products={} ctd_l0_conductivity_output_dp_obj = IonObject( RT.DataProduct, name='L0_Conductivity', description='transform output conductivity', temporal_domain = tdom, spatial_domain = sdom) ctd_l0_conductivity_output_dp_id = self.dataproductclient.create_data_product(ctd_l0_conductivity_output_dp_obj, outgoing_stream_l0_conductivity_id) self.output_products['conductivity'] = ctd_l0_conductivity_output_dp_id ctd_l0_pressure_output_dp_obj = IonObject(RT.DataProduct, name='L0_Pressure', description='transform output pressure', temporal_domain = tdom, spatial_domain = sdom) ctd_l0_pressure_output_dp_id = self.dataproductclient.create_data_product(ctd_l0_pressure_output_dp_obj, outgoing_stream_l0_pressure_id) self.output_products['pressure'] = ctd_l0_pressure_output_dp_id ctd_l0_temperature_output_dp_obj = IonObject(RT.DataProduct, name='L0_Temperature', description='transform output temperature', temporal_domain = tdom, spatial_domain = sdom) ctd_l0_temperature_output_dp_id = self.dataproductclient.create_data_product(ctd_l0_temperature_output_dp_obj, outgoing_stream_l0_temperature_id) self.output_products['temperature'] = ctd_l0_temperature_output_dp_id #------------------------------- # Create listener for data process events and verify that events are received. #------------------------------- # todo: add this validate for Req: L4-CI-SA-RQ-367 Data processing shall notify registered data product consumers about data processing workflow life cycle events #todo (contd) ... I believe the capability does not exist yet now. ANS And SA are not yet publishing any workflow life cycle events (Swarbhanu) #------------------------------- # L0 Conductivity - Temperature - Pressure: Create the data process #------------------------------- ctd_l0_all_data_process_id = self.dataprocessclient.create_data_process(ctd_L0_all_dprocdef_id, [ctd_parsed_data_product], self.output_products) #------------------------------- # Retrieve a list of all data process defintions in RR and validate that the DPD is listed #------------------------------- # todo: add this validate for Req: L4-CI-SA-RQ-366 Data processing shall manage data topic definitions # todo: This capability is not yet completed (Swarbhanu) self.dataprocessclient.activate_data_process(ctd_l0_all_data_process_id) #todo: check that activate event is received L4-CI-SA-RQ-367 #todo... (it looks like no event is being published when the data process is activated... so below, we just check for now # todo... that the subscription is indeed activated) (Swarbhanu) # todo: monitor process to see if it is active (sa-rq-182) ctd_l0_all_data_process = self.rrclient.read(ctd_l0_all_data_process_id) input_subscription_id = ctd_l0_all_data_process.input_subscription_id subs = self.rrclient.read(input_subscription_id) self.assertTrue(subs.activated) # todo: This has not yet been completed by CEI, will prbly surface thru a DPMS call self.dataprocessclient.deactivate_data_process(ctd_l0_all_data_process_id) #------------------------------- # Retrieve the extended resources for data process definition and for data process #------------------------------- extended_process_definition = self.dataprocessclient.get_data_process_definition_extension(ctd_L0_all_dprocdef_id) self.assertEqual(1, len(extended_process_definition.data_processes)) log.debug("test_createDataProcess: extended_process_definition %s", str(extended_process_definition)) extended_process = self.dataprocessclient.get_data_process_extension(ctd_l0_all_data_process_id) self.assertEqual(1, len(extended_process.input_data_products)) log.debug("test_createDataProcess: extended_process %s", str(extended_process)) #------------------------------- # Cleanup #------------------------------- self.dataprocessclient.delete_data_process(ctd_l0_all_data_process_id) self.dataprocessclient.delete_data_process_definition(ctd_L0_all_dprocdef_id) self.dataprocessclient.force_delete_data_process(ctd_l0_all_data_process_id) self.dataprocessclient.force_delete_data_process_definition(ctd_L0_all_dprocdef_id)
class TestTransformWorker(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Instantiate a process to represent the test process = TransformWorkerTestProcess() self.dataset_management_client = DatasetManagementServiceClient( node=self.container.node) self.pubsub_client = PubsubManagementServiceClient( node=self.container.node) self.dataproductclient = DataProductManagementServiceClient( node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient( node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient( node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient( node=self.container.node) self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.imsclient = InstrumentManagementServiceProcessClient( node=self.container.node, process=process) self.time_dom, self.spatial_dom = time_series_domain() self.ph = ParameterHelper(self.dataset_management_client, self.addCleanup) self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10) def push_granule(self, data_product_id): ''' Publishes and monitors that the granule arrived ''' datasets, _ = self.rrclient.find_objects(data_product_id, PRED.hasDataset, id_only=True) dataset_monitor = DatasetMonitor(datasets[0]) rdt = self.ph.rdt_for_data_product(data_product_id) self.ph.fill_parsed_rdt(rdt) self.ph.publish_rdt_to_data_product(data_product_id, rdt) assert dataset_monitor.wait() dataset_monitor.stop() @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.dp_list = [] self.data_process_objs = [] self._output_stream_ids = [] self.granule_verified = Event() self.worker_assigned_event_verified = Event() self.dp_created_event_verified = Event() self.heartbeat_event_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] self.start_event_listener() # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process( ) self.dp_list.append(dataprocess_id) # validate the repository for data product algorithms persists the new resources NEW SA-1 # create_data_process call created one of each dpd_ids, _ = self.rrclient.find_resources( restype=OT.DataProcessDefinition, id_only=False) # there will be more than one becuase of the DPDs that reperesent the PFs in the data product above self.assertTrue(dpd_ids is not None) dp_ids, _ = self.rrclient.find_resources(restype=OT.DataProcess, id_only=False) # only one DP becuase the PFs that are in the code dataproduct above are not activated yet. self.assertEquals(len(dp_ids), 1) # validate the name and version label NEW SA - 2 dataprocessdef_obj = self.dataprocessclient.read_data_process_definition( dataprocessdef_id) self.assertEqual(dataprocessdef_obj.version_label, '1.0a') self.assertEqual(dataprocessdef_obj.name, 'add_arrays') # validate that the DPD has an attachment NEW SA - 21 attachment_ids, assoc_ids = self.rrclient.find_objects( dataprocessdef_id, PRED.hasAttachment, RT.Attachment, True) self.assertEqual(len(attachment_ids), 1) attachment_obj = self.rrclient.read_attachment(attachment_ids[0]) log.debug('attachment: %s', attachment_obj) # validate that the data process resource has input and output data products associated # L4-CI-SA-RQ-364 and NEW SA-3 outproduct_ids, assoc_ids = self.rrclient.find_objects( dataprocess_id, PRED.hasOutputProduct, RT.DataProduct, True) self.assertEqual(len(outproduct_ids), 1) inproduct_ids, assoc_ids = self.rrclient.find_objects( dataprocess_id, PRED.hasInputProduct, RT.DataProduct, True) self.assertEqual(len(inproduct_ids), 1) # Test for provenance. Get Data product produced by the data processes output_data_product_id, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance( output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 2) self.assertTrue(self.input_dp_id in output_data_product_provenance[ output_data_product_id[0]]['parents']) self.assertTrue(output_data_product_provenance[ output_data_product_id[0]]['parents'][self.input_dp_id] ['data_process_definition_id'] == dataprocessdef_id) # NEW SA - 4 | Data processing shall include the appropriate data product algorithm name and version number in # the metadata of each output data product created by the data product algorithm. output_data_product_obj, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=False) self.assertTrue(output_data_product_obj[0].name != None) self.assertTrue(output_data_product_obj[0]._rev != None) # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=dataprocess_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route) for n in range(1, 101): rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) # validate that the output granule is received and the updated value is correct self.assertTrue(self.granule_verified.wait(self.wait_time)) # validate that the data process loaded into worker event is received (L4-CI-SA-RQ-182) self.assertTrue( self.worker_assigned_event_verified.wait(self.wait_time)) # validate that the data process create (with data product ids) event is received (NEW SA -42) self.assertTrue(self.dp_created_event_verified.wait(self.wait_time)) # validate that the data process heartbeat event is received (for every hundred granules processed) (L4-CI-SA-RQ-182) #this takes a while so set wait limit to large value self.assertTrue(self.heartbeat_event_verified.wait(200)) # validate that the code from the transform function can be retrieve via inspect_data_process_definition src = self.dataprocessclient.inspect_data_process_definition( dataprocessdef_id) self.assertIn('def add_arrays(a, b)', src) # now delete the DPD and DP then verify that the resources are retired so that information required for provenance are still available self.dataprocessclient.delete_data_process(dataprocess_id) self.dataprocessclient.delete_data_process_definition( dataprocessdef_id) in_dp_objs, _ = self.rrclient.find_objects( subject=dataprocess_id, predicate=PRED.hasInputProduct, object_type=RT.DataProduct, id_only=True) self.assertTrue(in_dp_objs is not None) dpd_objs, _ = self.rrclient.find_subjects( subject_type=RT.DataProcessDefinition, predicate=PRED.hasDataProcess, object=dataprocess_id, id_only=True) self.assertTrue(dpd_objs is not None) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker_with_instrumentdevice(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # Create CTD Parsed as the initial data product # create a stream definition for the data from the ctd simulator self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id) # only ever need one device for testing purposes. instDevice_obj, _ = self.rrclient.find_resources( restype=RT.InstrumentDevice, name='test_ctd_device') if instDevice_obj: instDevice_id = instDevice_obj[0]._id else: instDevice_obj = IonObject(RT.InstrumentDevice, name='test_ctd_device', description="test_ctd_device", serial_number="12345") instDevice_id = self.imsclient.create_instrument_device( instrument_device=instDevice_obj) self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=self.input_dp_id) # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process( ) self.addCleanup(self.dataprocessclient.delete_data_process, dataprocess_id) self.addCleanup(self.dataprocessclient.delete_data_process_definition, dataprocessdef_id) # Test for provenance. Get Data product produced by the data processes output_data_product_id, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance( output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 3) self.assertTrue(self.input_dp_id in output_data_product_provenance[ output_data_product_id[0]]['parents']) self.assertTrue(instDevice_id in output_data_product_provenance[ self.input_dp_id]['parents']) self.assertTrue(output_data_product_provenance[instDevice_id]['type'] == 'InstrumentDevice') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker_with_platformdevice(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # Create CTD Parsed as the initial data product # create a stream definition for the data from the ctd simulator self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id) # only ever need one device for testing purposes. platform_device_obj, _ = self.rrclient.find_resources( restype=RT.PlatformDevice, name='TestPlatform') if platform_device_obj: platform_device_id = platform_device_obj[0]._id else: platform_device_obj = IonObject(RT.PlatformDevice, name='TestPlatform', description="TestPlatform", serial_number="12345") platform_device_id = self.imsclient.create_platform_device( platform_device=platform_device_obj) self.damsclient.assign_data_product( input_resource_id=platform_device_id, data_product_id=self.input_dp_id) # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process( ) self.addCleanup(self.dataprocessclient.delete_data_process, dataprocess_id) self.addCleanup(self.dataprocessclient.delete_data_process_definition, dataprocessdef_id) # Test for provenance. Get Data product produced by the data processes output_data_product_id, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance( output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 3) self.assertTrue(self.input_dp_id in output_data_product_provenance[ output_data_product_id[0]]['parents']) self.assertTrue(platform_device_id in output_data_product_provenance[ self.input_dp_id]['parents']) self.assertTrue(output_data_product_provenance[platform_device_id] ['type'] == 'PlatformDevice') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_event_transform_worker(self): self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # test that a data process (type: data-product-in / event-out) can be defined and launched. # verify that event fields are correctly populated self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] # create the DPD and two DPs self.event_data_process_id = self.create_event_data_processes() # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=self.event_data_process_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_event_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route) self.start_event_transform_listener() self.data_modified = Event() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) self.assertTrue(self.event_verified.wait(self.wait_time)) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_bad_argument_map(self): self._output_stream_ids = [] # test that a data process (type: data-product-in / data-product-out) parameter mapping it validated during # data process creation and that the correct exception is raised for both input and output. self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # two data processes using one transform and one DPD dp1_func_output_dp_id = self.create_output_data_product() # Set up DPD and DP #2 - array add function tf_obj = IonObject( RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM, uri= 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject( RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS) add_array_dpd_id = self.dataprocessclient.create_data_process_definition( data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition( self.stream_def_id, add_array_dpd_id, binding='add_array_func') # create the data process with invalid argument map argument_map = {"arr1": "foo", "arr2": "bar"} output_param = "salinity" with self.assertRaises(BadRequest) as cm: dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) ex = cm.exception log.debug(' exception raised: %s', cm) self.assertEqual( ex.message, "Input data product does not contain the parameters defined in argument map" ) # create the data process with invalid output parameter name argument_map = {"arr1": "conductivity", "arr2": "pressure"} output_param = "foo" with self.assertRaises(BadRequest) as cm: dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) ex = cm.exception log.debug(' exception raised: %s', cm) self.assertEqual( ex.message, "Output data product does not contain the output parameter name provided" ) def create_event_data_processes(self): # two data processes using one transform and one DPD argument_map = {"a": "salinity"} # set up DPD and DP #2 - array add function tf_obj = IonObject( RT.TransformFunction, name='validate_salinity_array', description='validate_salinity_array', function='validate_salinity_array', module="ion.processes.data.transforms.test.test_transform_worker", arguments=['a'], function_type=TransformFunctionType.TRANSFORM) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject( RT.DataProcessDefinition, name='validate_salinity_array', description='validate_salinity_array', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, ) add_array_dpd_id = self.dataprocessclient.create_data_process_definition( data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition( self.stream_def_id, add_array_dpd_id, binding='validate_salinity_array') # create the data process dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=None, argument_map=argument_map) self.damsclient.register_process(dp1_data_process_id) self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id) return dp1_data_process_id def create_data_process(self): # two data processes using one transform and one DPD dp1_func_output_dp_id = self.create_output_data_product() argument_map = {"arr1": "conductivity", "arr2": "pressure"} output_param = "salinity" # set up DPD and DP #2 - array add function tf_obj = IonObject( RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM, uri= 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject( RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, version_label='1.0a') add_array_dpd_id = self.dataprocessclient.create_data_process_definition( data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition( self.stream_def_id, add_array_dpd_id, binding='add_array_func') # create the data process dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) self.damsclient.register_process(dp1_data_process_id) #self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id) # add an attachment object to this DPD to test new SA-21 import msgpack attachment_content = 'foo bar' attachment_obj = IonObject(RT.Attachment, name='test_attachment', attachment_type=AttachmentType.ASCII, content_type='text/plain', content=msgpack.packb(attachment_content)) att_id = self.rrclient.create_attachment(add_array_dpd_id, attachment_obj) self.addCleanup(self.rrclient.delete_attachment, att_id) return add_array_dpd_id, dp1_data_process_id, dp1_func_output_dp_id def create_output_data_product(self): dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition( name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id) dp1_output_dp_obj = IonObject(RT.DataProduct, name='data_process1_data_product', description='output of add array func') dp1_func_output_dp_id = self.dataproductclient.create_data_product( dp1_output_dp_obj, dp1_outgoing_stream_id) self.addCleanup(self.dataproductclient.delete_data_product, dp1_func_output_dp_id) # retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id, PRED.hasStream, None, True) self._output_stream_ids.append(stream_ids[0]) subscription_id = self.pubsub_client.create_subscription( 'validator', data_product_ids=[dp1_func_output_dp_id]) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) def on_granule(msg, route, stream_id): log.debug('recv_packet stream_id: %s route: %s msg: %s', stream_id, route, msg) self.validate_output_granule(msg, route, stream_id) self.granule_verified.set() validator = StandaloneStreamSubscriber('validator', callback=on_granule) validator.start() self.addCleanup(validator.stop) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) return dp1_func_output_dp_id def validate_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ data_process_event = args[0] log.debug("DataProcessStatusEvent: %s", str(data_process_event.__dict__)) # if data process already created, check origin if self.dp_list: self.assertIn(data_process_event.origin, self.dp_list) # if this is a heartbeat event then 100 granules have been processed if 'data process status update.' in data_process_event.description: self.heartbeat_event_verified.set() else: # else check that this is the assign event if 'Data process assigned to transform worker' in data_process_event.description: self.worker_assigned_event_verified.set() elif 'Data process created for data product' in data_process_event.description: self.dp_created_event_verified.set() def validate_output_granule(self, msg, route, stream_id): self.assertIn(stream_id, self._output_stream_ids) rdt = RecordDictionaryTool.load_from_granule(msg) log.debug('validate_output_granule rdt: %s', rdt) sal_val = rdt['salinity'] np.testing.assert_array_equal(sal_val, np.array([3])) def start_event_listener(self): es = EventSubscriber(event_type=OT.DataProcessStatusEvent, callback=self.validate_event) es.start() self.addCleanup(es.stop) def validate_transform_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ status_alert_event = args[0] np.testing.assert_array_equal(status_alert_event.origin, self.stream_id) np.testing.assert_array_equal(status_alert_event.values, np.array([self.event_data_process_id])) log.debug("DeviceStatusAlertEvent: %s", str(status_alert_event.__dict__)) self.event_verified.set() def start_event_transform_listener(self): es = EventSubscriber(event_type=OT.DeviceStatusAlertEvent, callback=self.validate_transform_event) es.start() self.addCleanup(es.stop) def test_download(self): egg_url = 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' egg_path = TransformWorker.download_egg(egg_url) import pkg_resources pkg_resources.working_set.add_entry(egg_path) from ion_example.add_arrays import add_arrays a = add_arrays(1, 2) self.assertEquals(a, 3)
class TestIntDataProcessManagementServiceMultiOut(IonIntegrationTestCase): def setUp(self): # Start container self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node) self.pubsubclient = PubsubManagementServiceClient(node=self.container.node) self.ingestclient = IngestionManagementServiceClient(node=self.container.node) self.imsclient = InstrumentManagementServiceClient(node=self.container.node) self.dataproductclient = DataProductManagementServiceClient(node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node) self.datasetclient = DatasetManagementServiceClient(node=self.container.node) self.dataset_management = self.datasetclient self.process_dispatcher = ProcessDispatcherServiceClient(node=self.container.node) def test_createDataProcess(self): #--------------------------------------------------------------------------- # Data Process Definition #--------------------------------------------------------------------------- dpd_obj = IonObject(RT.DataProcessDefinition, name='ctd_L0_all', description='transform ctd package into three separate L0 streams', module='ion.processes.data.transforms.ctd.ctd_L0_all', class_name='ctd_L0_all') dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) # Make assertion on the newly registered data process definition data_process_definition = self.rrclient.read(dprocdef_id) self.assertEquals(data_process_definition.name, 'ctd_L0_all') self.assertEquals(data_process_definition.description, 'transform ctd package into three separate L0 streams') self.assertEquals(data_process_definition.module, 'ion.processes.data.transforms.ctd.ctd_L0_all') self.assertEquals(data_process_definition.class_name, 'ctd_L0_all') # Read the data process definition using data process management and make assertions dprocdef_obj = self.dataprocessclient.read_data_process_definition(dprocdef_id) self.assertEquals(dprocdef_obj.class_name,'ctd_L0_all') self.assertEquals(dprocdef_obj.module,'ion.processes.data.transforms.ctd.ctd_L0_all') #--------------------------------------------------------------------------- # Create an input instrument #--------------------------------------------------------------------------- instrument_obj = IonObject(RT.InstrumentDevice, name='Inst1',description='an instrument that is creating the data product') instrument_id, rev = self.rrclient.create(instrument_obj) # Register the instrument so that the data producer and stream object are created data_producer_id = self.damsclient.register_instrument(instrument_id) # create a stream definition for the data from the ctd simulator pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubclient.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_input_stream_definition_to_data_process_definition(ctd_stream_def_id, dprocdef_id ) # Assert that the link between the stream definition and the data process definition was done assocs = self.rrclient.find_associations(subject=dprocdef_id, predicate=PRED.hasInputStreamDefinition, object=ctd_stream_def_id, id_only=True) self.assertIsNotNone(assocs) #--------------------------------------------------------------------------- # Input Data Product #--------------------------------------------------------------------------- tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() input_dp_obj = IonObject( RT.DataProduct, name='InputDataProduct', description='some new dp', temporal_domain = tdom, spatial_domain = sdom) input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=ctd_stream_def_id, exchange_point='test') #Make assertions on the input data product created input_dp_obj = self.rrclient.read(input_dp_id) self.assertEquals(input_dp_obj.name, 'InputDataProduct') self.assertEquals(input_dp_obj.description, 'some new dp') self.damsclient.assign_data_product(instrument_id, input_dp_id) # Retrieve the stream via the DataProduct->Stream associations stream_ids, _ = self.rrclient.find_objects(input_dp_id, PRED.hasStream, None, True) self.in_stream_id = stream_ids[0] #--------------------------------------------------------------------------- # Output Data Product #--------------------------------------------------------------------------- outgoing_stream_conductivity_id = self.pubsubclient.create_stream_definition(name='conductivity', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_conductivity_id, dprocdef_id,binding='conductivity' ) outgoing_stream_pressure_id = self.pubsubclient.create_stream_definition(name='pressure', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_pressure_id, dprocdef_id, binding='pressure' ) outgoing_stream_temperature_id = self.pubsubclient.create_stream_definition(name='temperature', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_temperature_id, dprocdef_id, binding='temperature' ) self.output_products={} output_dp_obj = IonObject(RT.DataProduct, name='conductivity', description='transform output conductivity', temporal_domain = tdom, spatial_domain = sdom) output_dp_id_1 = self.dataproductclient.create_data_product(output_dp_obj, outgoing_stream_conductivity_id) self.output_products['conductivity'] = output_dp_id_1 output_dp_obj = IonObject(RT.DataProduct, name='pressure', description='transform output pressure', temporal_domain = tdom, spatial_domain = sdom) output_dp_id_2 = self.dataproductclient.create_data_product(output_dp_obj, outgoing_stream_pressure_id) self.output_products['pressure'] = output_dp_id_2 output_dp_obj = IonObject(RT.DataProduct, name='temperature', description='transform output ', temporal_domain = tdom, spatial_domain = sdom) output_dp_id_3 = self.dataproductclient.create_data_product(output_dp_obj, outgoing_stream_temperature_id) self.output_products['temperature'] = output_dp_id_3 #--------------------------------------------------------------------------- # Create the data process #--------------------------------------------------------------------------- def _create_data_process(): dproc_id = self.dataprocessclient.create_data_process(dprocdef_id, [input_dp_id], self.output_products) return dproc_id dproc_id = _create_data_process() # Make assertions on the data process created data_process = self.dataprocessclient.read_data_process(dproc_id) # Assert that the data process has a process id attached self.assertIsNotNone(data_process.process_id) # Assert that the data process got the input data product's subscription id attached as its own input_susbcription_id attribute self.assertIsNotNone(data_process.input_subscription_id) output_data_product_ids = self.rrclient.find_objects(subject=dproc_id, predicate=PRED.hasOutputProduct, object_type=RT.DataProduct, id_only=True) self.assertEquals(Set(output_data_product_ids[0]), Set([output_dp_id_1,output_dp_id_2,output_dp_id_3])) @patch.dict(CFG, {'endpoint':{'receive':{'timeout': 60}}}) def test_createDataProcessUsingSim(self): #------------------------------- # Create InstrumentModel #------------------------------- instModel_obj = IonObject(RT.InstrumentModel, name='SBE37IMModel', description="SBE37IMModel" ) instModel_id = self.imsclient.create_instrument_model(instModel_obj) #------------------------------- # Create InstrumentAgent #------------------------------- instAgent_obj = IonObject(RT.InstrumentAgent, name='agent007', description="SBE37IMAgent", driver_uri="http://sddevrepo.oceanobservatories.org/releases/seabird_sbe37smb_ooicore-0.0.1-py2.7.egg") instAgent_id = self.imsclient.create_instrument_agent(instAgent_obj) self.imsclient.assign_instrument_model_to_instrument_agent(instModel_id, instAgent_id) #------------------------------- # Create InstrumentDevice #------------------------------- instDevice_obj = IonObject(RT.InstrumentDevice, name='SBE37IMDevice', description="SBE37IMDevice", serial_number="12345" ) instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj) self.imsclient.assign_instrument_model_to_instrument_device(instModel_id, instDevice_id) #------------------------------- # Create InstrumentAgentInstance to hold configuration information #------------------------------- port_agent_config = { 'device_addr': 'sbe37-simulator.oceanobservatories.org', 'device_port': 4001, 'process_type': PortAgentProcessType.UNIX, 'binary_path': "port_agent", 'command_port': 4002, 'data_port': 4003, 'log_level': 5, } port_agent_config = { 'device_addr': CFG.device.sbe37.host, 'device_port': CFG.device.sbe37.port, 'process_type': PortAgentProcessType.UNIX, 'binary_path': "port_agent", 'port_agent_addr': 'localhost', 'command_port': CFG.device.sbe37.port_agent_cmd_port, 'data_port': CFG.device.sbe37.port_agent_data_port, 'log_level': 5, 'type': PortAgentType.ETHERNET } instAgentInstance_obj = IonObject(RT.InstrumentAgentInstance, name='SBE37IMAgentInstance', description="SBE37IMAgentInstance", port_agent_config = port_agent_config) instAgentInstance_id = self.imsclient.create_instrument_agent_instance(instAgentInstance_obj, instAgent_id, instDevice_id) #------------------------------- # Create CTD Parsed as the first data product #------------------------------- # create a stream definition for the data from the ctd simulator pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubclient.create_stream_definition(name='SBE32_CDM', parameter_dictionary_id=pdict_id) # Construct temporal and spatial Coordinate Reference System objects tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() dp_obj = IonObject(RT.DataProduct, name='ctd_parsed', description='ctd stream test', temporal_domain = tdom, spatial_domain = sdom) ctd_parsed_data_product = self.dataproductclient.create_data_product(dp_obj, ctd_stream_def_id) self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=ctd_parsed_data_product) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product, PRED.hasStream, None, True) #------------------------------- # Create CTD Raw as the second data product #------------------------------- raw_stream_def_id = self.pubsubclient.create_stream_definition(name='SBE37_RAW', parameter_dictionary_id=pdict_id) dp_obj.name = 'ctd_raw' ctd_raw_data_product = self.dataproductclient.create_data_product(dp_obj, raw_stream_def_id) self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=ctd_raw_data_product) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(ctd_raw_data_product, PRED.hasStream, None, True) #------------------------------- # L0 Conductivity - Temperature - Pressure: Data Process Definition #------------------------------- dpd_obj = IonObject(RT.DataProcessDefinition, name='ctd_L0_all', description='transform ctd package into three separate L0 streams', module='ion.processes.data.transforms.ctd.ctd_L0_all', class_name='ctd_L0_all') ctd_L0_all_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) #------------------------------- # L0 Conductivity - Temperature - Pressure: Output Data Products #------------------------------- outgoing_stream_l0_conductivity_id = self.pubsubclient.create_stream_definition(name='L0_Conductivity', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l0_conductivity_id, ctd_L0_all_dprocdef_id, binding='conductivity' ) outgoing_stream_l0_pressure_id = self.pubsubclient.create_stream_definition(name='L0_Pressure', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l0_pressure_id, ctd_L0_all_dprocdef_id, binding='pressure' ) outgoing_stream_l0_temperature_id = self.pubsubclient.create_stream_definition(name='L0_Temperature', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l0_temperature_id, ctd_L0_all_dprocdef_id, binding='temperature' ) self.output_products={} ctd_l0_conductivity_output_dp_obj = IonObject( RT.DataProduct, name='L0_Conductivity', description='transform output conductivity', temporal_domain = tdom, spatial_domain = sdom) ctd_l0_conductivity_output_dp_id = self.dataproductclient.create_data_product(ctd_l0_conductivity_output_dp_obj, outgoing_stream_l0_conductivity_id) self.output_products['conductivity'] = ctd_l0_conductivity_output_dp_id ctd_l0_pressure_output_dp_obj = IonObject(RT.DataProduct, name='L0_Pressure', description='transform output pressure', temporal_domain = tdom, spatial_domain = sdom) ctd_l0_pressure_output_dp_id = self.dataproductclient.create_data_product(ctd_l0_pressure_output_dp_obj, outgoing_stream_l0_pressure_id) self.output_products['pressure'] = ctd_l0_pressure_output_dp_id ctd_l0_temperature_output_dp_obj = IonObject(RT.DataProduct, name='L0_Temperature', description='transform output temperature', temporal_domain = tdom, spatial_domain = sdom) ctd_l0_temperature_output_dp_id = self.dataproductclient.create_data_product(ctd_l0_temperature_output_dp_obj, outgoing_stream_l0_temperature_id) self.output_products['temperature'] = ctd_l0_temperature_output_dp_id #------------------------------- # Create listener for data process events and verify that events are received. #------------------------------- # todo: add this validate for Req: L4-CI-SA-RQ-367 Data processing shall notify registered data product consumers about data processing workflow life cycle events #todo (contd) ... I believe the capability does not exist yet now. ANS And SA are not yet publishing any workflow life cycle events (Swarbhanu) #------------------------------- # L0 Conductivity - Temperature - Pressure: Create the data process #------------------------------- ctd_l0_all_data_process_id = self.dataprocessclient.create_data_process(ctd_L0_all_dprocdef_id, [ctd_parsed_data_product], self.output_products) data_process = self.rrclient.read(ctd_l0_all_data_process_id) process_id = data_process.process_id self.addCleanup(self.process_dispatcher.cancel_process, process_id) #------------------------------- # Wait until the process launched in the create_data_process() method is actually running, before proceeding further in this test #------------------------------- gate = ProcessStateGate(self.process_dispatcher.read_process, process_id, ProcessStateEnum.RUNNING) self.assertTrue(gate.await(30), "The data process (%s) did not spawn in 30 seconds" % process_id) #------------------------------- # Retrieve a list of all data process defintions in RR and validate that the DPD is listed #------------------------------- # todo: Req: L4-CI-SA-RQ-366 Data processing shall manage data topic definitions # todo: data topics are being handled by pub sub at the level of streams self.dataprocessclient.activate_data_process(ctd_l0_all_data_process_id) #todo: check that activate event is received L4-CI-SA-RQ-367 #todo... (it looks like no event is being published when the data process is activated... so below, we just check for now # todo... that the subscription is indeed activated) (Swarbhanu) # todo: monitor process to see if it is active (sa-rq-182) ctd_l0_all_data_process = self.rrclient.read(ctd_l0_all_data_process_id) input_subscription_id = ctd_l0_all_data_process.input_subscription_id subs = self.rrclient.read(input_subscription_id) self.assertTrue(subs.activated) # todo: This has not yet been completed by CEI, will prbly surface thru a DPMS call self.dataprocessclient.deactivate_data_process(ctd_l0_all_data_process_id) #------------------------------- # Retrieve the extended resources for data process definition and for data process #------------------------------- extended_process_definition = self.dataprocessclient.get_data_process_definition_extension(ctd_L0_all_dprocdef_id) self.assertEqual(1, len(extended_process_definition.data_processes)) log.debug("test_createDataProcess: extended_process_definition %s", str(extended_process_definition)) extended_process = self.dataprocessclient.get_data_process_extension(ctd_l0_all_data_process_id) self.assertEqual(1, len(extended_process.input_data_products)) log.debug("test_createDataProcess: extended_process %s", str(extended_process)) ################################ Test the removal of data processes ################################## #------------------------------------------------------------------- # Test the deleting of the data process #------------------------------------------------------------------- # Before deleting, get the input streams, output streams and the subscriptions so that they can be checked after deleting # dp_obj_1 = self.rrclient.read(ctd_l0_all_data_process_id) # input_subscription_id = dp_obj_1.input_subscription_id # out_prods, _ = self.rrclient.find_objects(subject=ctd_l0_all_data_process_id, predicate=PRED.hasOutputProduct, id_only=True) # in_prods, _ = self.rrclient.find_objects(ctd_l0_all_data_process_id, PRED.hasInputProduct, id_only=True) # in_streams = [] # for in_prod in in_prods: # streams, _ = self.rrclient.find_objects(in_prod, PRED.hasStream, id_only=True) # in_streams.extend(streams) # out_streams = [] # for out_prod in out_prods: # streams, _ = self.rrclient.find_objects(out_prod, PRED.hasStream, id_only=True) # out_streams.extend(streams) # Deleting the data process self.dataprocessclient.delete_data_process(ctd_l0_all_data_process_id) # Check that the data process got removed. Check the lcs state. It should be retired dp_obj = self.rrclient.read(ctd_l0_all_data_process_id) self.assertEquals(dp_obj.lcstate, LCS.RETIRED) # Check for process defs still attached to the data process dpd_assn_ids = self.rrclient.find_associations(subject=ctd_l0_all_data_process_id, predicate=PRED.hasProcessDefinition, id_only=True) self.assertEquals(len(dpd_assn_ids), 0) # Check for output data product still attached to the data process out_products, assocs = self.rrclient.find_objects(subject=ctd_l0_all_data_process_id, predicate=PRED.hasOutputProduct, id_only=True) self.assertEquals(len(out_products), 0) self.assertEquals(len(assocs), 0) # Check for input data products still attached to the data process inprod_associations = self.rrclient.find_associations(ctd_l0_all_data_process_id, PRED.hasInputProduct) self.assertEquals(len(inprod_associations), 0) # Check for input data products still attached to the data process inprod_associations = self.rrclient.find_associations(ctd_l0_all_data_process_id, PRED.hasInputProduct) self.assertEquals(len(inprod_associations), 0) # Check of the data process has been deactivated self.assertIsNone(dp_obj.input_subscription_id) # Read the original subscription id of the data process and check that it has been deactivated with self.assertRaises(NotFound): self.pubsubclient.read_subscription(input_subscription_id) #------------------------------------------------------------------- # Delete the data process definition #------------------------------------------------------------------- # before deleting, get the process definition being associated to in order to be able to check later if the latter gets deleted as it should proc_def_ids, proc_def_asocs = self.rrclient.find_objects(ctd_l0_all_data_process_id, PRED.hasProcessDefinition) self.dataprocessclient.delete_data_process_definition(ctd_L0_all_dprocdef_id) # check that the data process definition has been retired dp_proc_def = self.rrclient.read(ctd_L0_all_dprocdef_id) self.assertEquals(dp_proc_def.lcstate, LCS.RETIRED) # Check for old associations of this data process definition proc_defs, proc_def_asocs = self.rrclient.find_objects(ctd_L0_all_dprocdef_id, PRED.hasProcessDefinition) self.assertEquals(len(proc_defs), 0) # find all associations where this is the subject _, obj_assns = self.rrclient.find_objects(subject= ctd_L0_all_dprocdef_id, id_only=True) self.assertEquals(len(obj_assns), 0) ################################ Test the removal of data processes ################################## # Try force delete... This should simply delete the associations and the data process object # from the resource registry #--------------------------------------------------------------------------------------------------------------- # Force deleting a data process #--------------------------------------------------------------------------------------------------------------- self.dataprocessclient.force_delete_data_process(ctd_l0_all_data_process_id) # find all associations where this is the subject _, obj_assns = self.rrclient.find_objects(subject=ctd_l0_all_data_process_id, id_only=True) # find all associations where this is the object _, sbj_assns = self.rrclient.find_subjects(object=ctd_l0_all_data_process_id, id_only=True) self.assertEquals(len(obj_assns), 0) self.assertEquals(len(sbj_assns), 0) with self.assertRaises(NotFound): self.rrclient.read(ctd_l0_all_data_process_id) #--------------------------------------------------------------------------------------------------------------- # Force deleting a data process definition #--------------------------------------------------------------------------------------------------------------- self.dataprocessclient.force_delete_data_process_definition(ctd_L0_all_dprocdef_id) # find all associations where this is the subject _, obj_assns = self.rrclient.find_objects(subject=ctd_l0_all_data_process_id, id_only=True) # find all associations where this is the object _, sbj_assns = self.rrclient.find_subjects(object=ctd_l0_all_data_process_id, id_only=True) self.assertEquals(len(obj_assns), 0) self.assertEquals(len(sbj_assns), 0) with self.assertRaises(NotFound): self.rrclient.read(ctd_l0_all_data_process_id)
class TestIntDataProcessManagementService(IonIntegrationTestCase): def setUp(self): # Start container self._start_container() # Establish endpoint with container container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) #print 'got CC client' container_client.start_rel_from_url('res/deploy/r2sa.yml') # Now create client to DataProcessManagementService self.Processclient = DataProcessManagementServiceClient(node=self.container.node) self.RRclient = ResourceRegistryServiceClient(node=self.container.node) self.DAMSclient = DataAcquisitionManagementServiceClient(node=self.container.node) self.DPMSclient = DataProductManagementServiceClient(node=self.container.node) self.PubSubClient = PubsubManagementServiceClient(node=self.container.node) def test_createDataProcess(self): #------------------------------- # Data Process Definition #------------------------------- log.debug("TestIntDataProcessManagementService: create data process definition") dpd_obj = IonObject(RT.DataProcessDefinition, name='data_process_definition', description='some new dpd', module='ion.processes.data.transforms.transform_example', class_name='TransformExample', process_source='some_source_reference') try: dprocdef_id = self.Processclient.create_data_process_definition(dpd_obj) except BadRequest as ex: self.fail("failed to create new data process definition: %s" %ex) # test Data Process Definition creation in rr dprocdef_obj = self.Processclient.read_data_process_definition(dprocdef_id) self.assertEquals(dprocdef_obj.name,'data_process_definition') # Create an input instrument instrument_obj = IonObject(RT.InstrumentDevice, name='Inst1',description='an instrument that is creating the data product') instrument_id, rev = self.RRclient.create(instrument_obj) # Register the instrument so that the data producer and stream object are created data_producer_id = self.DAMSclient.register_instrument(instrument_id) log.debug("TestIntDataProcessManagementService data_producer_id %s" % data_producer_id) #------------------------------- # Input Data Product #------------------------------- log.debug("TestIntDataProcessManagementService: create input data product") input_dp_obj = IonObject(RT.DataProduct, name='InputDataProduct', description='some new dp') try: input_dp_id = self.DPMSclient.create_data_product(input_dp_obj, instrument_id) except BadRequest as ex: self.fail("failed to create new input data product: %s" %ex) # Retrieve the stream via the DataProduct->Stream associations stream_ids, _ = self.RRclient.find_objects(input_dp_id, PRED.hasStream, None, True) log.debug("TestIntDataProcessManagementService: in stream_ids " + str(stream_ids)) self.in_stream_id = stream_ids[0] log.debug("TestIntDataProcessManagementService: Input Stream: " + str( self.in_stream_id)) #------------------------------- # Output Data Product #------------------------------- log.debug("TestIntDataProcessManagementService: create output data product") output_dp_obj = IonObject(RT.DataProduct, name='OutDataProduct',description='transform output') output_dp_id = self.DPMSclient.create_data_product(output_dp_obj) # this will NOT create a stream for the product becuase the data process (source) resource has not been created yet. #------------------------------- # Create the data process #------------------------------- log.debug("TestIntDataProcessManagementService: create_data_process start") try: dproc_id = self.Processclient.create_data_process(dprocdef_id, input_dp_id, output_dp_id) except BadRequest as ex: self.fail("failed to create new data process: %s" %ex) self.DAMSclient.assign_data_product(dproc_id, output_dp_id, False) log.debug("TestIntDataProcessManagementService: create_data_process return") #------------------------------- # Producer (Sample Input) #------------------------------- # Create a producing example process # cheat to make a publisher object to send messages in the test. # it is really hokey to pass process=self.cc but it works #stream_route = self.PubSubClient.register_producer(exchange_name='producer_doesnt_have_a_name1', stream_id=self.in_stream_id) #self.ctd_stream1_publisher = StreamPublisher(node=self.container.node, name=('science_data',stream_route.routing_key), process=self.container) pid = self.container.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = self.container.proc_manager.procs[pid] publisher_registrar = StreamPublisherRegistrar(process=dummy_process, node=self.container.node) self.ctd_stream1_publisher = publisher_registrar.create_publisher(stream_id=self.in_stream_id) msg = {'num':'3'} self.ctd_stream1_publisher.publish(msg) time.sleep(1) msg = {'num':'5'} self.ctd_stream1_publisher.publish(msg) time.sleep(1) msg = {'num':'9'} self.ctd_stream1_publisher.publish(msg) # See /tmp/transform_output for results..... # clean up the data process try: self.Processclient.delete_data_process(dproc_id) except BadRequest as ex: self.fail("failed to create new data process definition: %s" %ex) with self.assertRaises(NotFound) as e: self.Processclient.read_data_process(dproc_id) try: self.Processclient.delete_data_process_definition(dprocdef_id) except BadRequest as ex: self.fail("failed to create new data process definition: %s" %ex) with self.assertRaises(NotFound) as e: self.Processclient.read_data_process_definition(dprocdef_id)
class TestIntDataProcessManagementServiceMultiOut(IonIntegrationTestCase): def setUp(self): # Start container self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node) self.pubsubclient = PubsubManagementServiceClient(node=self.container.node) self.ingestclient = IngestionManagementServiceClient(node=self.container.node) self.imsclient = InstrumentManagementServiceClient(node=self.container.node) self.dataproductclient = DataProductManagementServiceClient(node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node) self.datasetclient = DatasetManagementServiceClient(node=self.container.node) # @unittest.skip('not working') def test_createDataProcess(self): #------------------------------- # Data Process Definition #------------------------------- log.debug("TestIntDataProcessMgmtServiceMultiOut: create data process definition") dpd_obj = IonObject(RT.DataProcessDefinition, name='ctd_L0_all', description='transform ctd package into three separate L0 streams', module='ion.processes.data.transforms.ctd.ctd_L0_all', class_name='ctd_L0_all', process_source='some_source_reference') try: dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) except BadRequest as ex: self.fail("failed to create new data process definition: %s" %ex) # test Data Process Definition creation in rr dprocdef_obj = self.dataprocessclient.read_data_process_definition(dprocdef_id) self.assertEquals(dprocdef_obj.name,'ctd_L0_all') # Create an input instrument instrument_obj = IonObject(RT.InstrumentDevice, name='Inst1',description='an instrument that is creating the data product') instrument_id, rev = self.rrclient.create(instrument_obj) # Register the instrument so that the data producer and stream object are created data_producer_id = self.damsclient.register_instrument(instrument_id) log.debug("TestIntDataProcessMgmtServiceMultiOut data_producer_id %s" % data_producer_id) # create a stream definition for the data from the ctd simulator ctd_stream_def = ctd_stream_definition() ctd_stream_def_id = self.pubsubclient.create_stream_definition(container=ctd_stream_def, name='Simulated CTD data') self.dataprocessclient.assign_input_stream_definition_to_data_process_definition(ctd_stream_def_id, dprocdef_id ) #------------------------------- # Input Data Product #------------------------------- log.debug("TestIntDataProcessMgmtServiceMultiOut: create input data product") craft = CoverageCraft sdom, tdom = craft.create_domains() sdom = sdom.dump() tdom = tdom.dump() parameter_dictionary = craft.create_parameters() parameter_dictionary = parameter_dictionary.dump() input_dp_obj = IonObject( RT.DataProduct, name='InputDataProduct', description='some new dp', temporal_domain = tdom, spatial_domain = sdom) input_dp_id = self.dataproductclient.create_data_product(input_dp_obj, ctd_stream_def_id, parameter_dictionary) self.damsclient.assign_data_product(instrument_id, input_dp_id) # Retrieve the stream via the DataProduct->Stream associations stream_ids, _ = self.rrclient.find_objects(input_dp_id, PRED.hasStream, None, True) log.debug("TestIntDataProcessMgmtServiceMultiOut: in stream_ids " + str(stream_ids)) self.in_stream_id = stream_ids[0] log.debug("TestIntDataProcessMgmtServiceMultiOut: Input Stream: " + str( self.in_stream_id)) #------------------------------- # Output Data Product #------------------------------- outgoing_stream_conductivity = L0_conductivity_stream_definition() outgoing_stream_conductivity_id = self.pubsubclient.create_stream_definition(container=outgoing_stream_conductivity, name='conductivity') self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_conductivity_id, dprocdef_id ) outgoing_stream_pressure = L0_pressure_stream_definition() outgoing_stream_pressure_id = self.pubsubclient.create_stream_definition(container=outgoing_stream_pressure, name='pressure') self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_pressure_id, dprocdef_id ) outgoing_stream_temperature = L0_temperature_stream_definition() outgoing_stream_temperature_id = self.pubsubclient.create_stream_definition(container=outgoing_stream_temperature, name='temperature') self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_temperature_id, dprocdef_id ) self.output_products={} log.debug("TestIntDataProcessMgmtServiceMultiOut: create output data product conductivity") output_dp_obj = IonObject(RT.DataProduct, name='conductivity', description='transform output conductivity', temporal_domain = tdom, spatial_domain = sdom) output_dp_id_1 = self.dataproductclient.create_data_product(output_dp_obj, outgoing_stream_conductivity_id, parameter_dictionary) self.output_products['conductivity'] = output_dp_id_1 self.dataproductclient.activate_data_product_persistence(data_product_id=output_dp_id_1) log.debug("TestIntDataProcessMgmtServiceMultiOut: create output data product pressure") output_dp_obj = IonObject(RT.DataProduct, name='pressure', description='transform output pressure', temporal_domain = tdom, spatial_domain = sdom) output_dp_id_2 = self.dataproductclient.create_data_product(output_dp_obj, outgoing_stream_pressure_id, parameter_dictionary) self.output_products['pressure'] = output_dp_id_2 self.dataproductclient.activate_data_product_persistence(data_product_id=output_dp_id_2) log.debug("TestIntDataProcessMgmtServiceMultiOut: create output data product temperature") output_dp_obj = IonObject(RT.DataProduct, name='temperature', description='transform output ', temporal_domain = tdom, spatial_domain = sdom) output_dp_id_3 = self.dataproductclient.create_data_product(output_dp_obj, outgoing_stream_temperature_id, parameter_dictionary) self.output_products['temperature'] = output_dp_id_3 self.dataproductclient.activate_data_product_persistence(data_product_id=output_dp_id_3) #------------------------------- # Create the data process #------------------------------- log.debug("TestIntDataProcessMgmtServiceMultiOut: create_data_process start") try: dproc_id = self.dataprocessclient.create_data_process(dprocdef_id, [input_dp_id], self.output_products) except BadRequest as ex: self.fail("failed to create new data process: %s" %ex) log.debug("TestIntDataProcessMgmtServiceMultiOut: create_data_process return") # these assigns happen inside create_data_process #self.damsclient.assign_data_product(input_resource_id=dproc_id, data_product_id=output_dp_id_1) #self.damsclient.assign_data_product(input_resource_id=dproc_id, data_product_id=output_dp_id_2) #self.damsclient.assign_data_product(input_resource_id=dproc_id, data_product_id=output_dp_id_3) # @unittest.skip('not working.. Fix activate_data_product_persistence()') @patch.dict(CFG, {'endpoint':{'receive':{'timeout': 60}}}) def test_createDataProcessUsingSim(self): #------------------------------- # Create InstrumentModel #------------------------------- instModel_obj = IonObject(RT.InstrumentModel, name='SBE37IMModel', description="SBE37IMModel", model="SBE37IMModel" ) try: instModel_id = self.imsclient.create_instrument_model(instModel_obj) except BadRequest as ex: self.fail("failed to create new InstrumentModel: %s" %ex) print 'test_createDataProcessUsingSim: new InstrumentModel id = ', instModel_id #------------------------------- # Create InstrumentAgent #------------------------------- instAgent_obj = IonObject(RT.InstrumentAgent, name='agent007', description="SBE37IMAgent", driver_module="ion.agents.instrument.instrument_agent", driver_class="InstrumentAgent" ) try: instAgent_id = self.imsclient.create_instrument_agent(instAgent_obj) except BadRequest as ex: self.fail("failed to create new InstrumentAgent: %s" %ex) print 'test_createDataProcessUsingSim: new InstrumentAgent id = ', instAgent_id self.imsclient.assign_instrument_model_to_instrument_agent(instModel_id, instAgent_id) #------------------------------- # Create InstrumentDevice #------------------------------- instDevice_obj = IonObject(RT.InstrumentDevice, name='SBE37IMDevice', description="SBE37IMDevice", serial_number="12345" ) try: instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj) self.imsclient.assign_instrument_model_to_instrument_device(instModel_id, instDevice_id) except BadRequest as ex: self.fail("failed to create new InstrumentDevice: %s" %ex) print 'test_createDataProcessUsingSim: new InstrumentDevice id = ', instDevice_id #------------------------------- # Create InstrumentAgentInstance to hold configuration information #------------------------------- instAgentInstance_obj = IonObject(RT.InstrumentAgentInstance, name='SBE37IMAgentInstance', description="SBE37IMAgentInstance", svr_addr="localhost", driver_module="mi.instrument.seabird.sbe37smb.ooicore.driver", driver_class="SBE37Driver", cmd_port=5556, evt_port=5557, comms_method="ethernet", comms_device_address=CFG.device.sbe37.host, comms_device_port=CFG.device.sbe37.port, comms_server_address="localhost", comms_server_port=8888) instAgentInstance_id = self.imsclient.create_instrument_agent_instance(instAgentInstance_obj, instAgent_id, instDevice_id) #------------------------------- # Create CTD Parsed as the first data product #------------------------------- # create a stream definition for the data from the ctd simulator ctd_stream_def = SBE37_CDM_stream_definition() ctd_stream_def_id = self.pubsubclient.create_stream_definition(container=ctd_stream_def) print 'test_createDataProcessUsingSim: new Stream Definition id = ', instDevice_id print 'Creating new CDM data product with a stream definition' craft = CoverageCraft sdom, tdom = craft.create_domains() sdom = sdom.dump() tdom = tdom.dump() parameter_dictionary = craft.create_parameters() parameter_dictionary = parameter_dictionary.dump() dp_obj = IonObject(RT.DataProduct, name='ctd_parsed', description='ctd stream test', temporal_domain = tdom, spatial_domain = sdom) ctd_parsed_data_product = self.dataproductclient.create_data_product(dp_obj, ctd_stream_def_id, parameter_dictionary) print 'new ctd_parsed_data_product_id = ', ctd_parsed_data_product self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=ctd_parsed_data_product) self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_parsed_data_product) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product, PRED.hasStream, None, True) print 'test_createDataProcessUsingSim: Data product streams1 = ', stream_ids #------------------------------- # Create CTD Raw as the second data product #------------------------------- print 'test_createDataProcessUsingSim: Creating new RAW data product with a stream definition' raw_stream_def = SBE37_RAW_stream_definition() raw_stream_def_id = self.pubsubclient.create_stream_definition(container=raw_stream_def) dp_obj = IonObject(RT.DataProduct, name='ctd_raw', description='raw stream test', temporal_domain = tdom, spatial_domain = sdom) ctd_raw_data_product = self.dataproductclient.create_data_product(dp_obj, raw_stream_def_id, parameter_dictionary) print 'new ctd_raw_data_product_id = ', ctd_raw_data_product self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=ctd_raw_data_product) self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_raw_data_product) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(ctd_raw_data_product, PRED.hasStream, None, True) print 'Data product streams2 = ', stream_ids #------------------------------- # L0 Conductivity - Temperature - Pressure: Data Process Definition #------------------------------- log.debug("test_createDataProcessUsingSim: create data process definition ctd_L0_all") dpd_obj = IonObject(RT.DataProcessDefinition, name='ctd_L0_all', description='transform ctd package into three separate L0 streams', module='ion.processes.data.transforms.ctd.ctd_L0_all', class_name='ctd_L0_all', process_source='some_source_reference') try: ctd_L0_all_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) except BadRequest as ex: self.fail("failed to create new ctd_L0_all data process definition: %s" %ex) #------------------------------- # L0 Conductivity - Temperature - Pressure: Output Data Products #------------------------------- outgoing_stream_l0_conductivity = L0_conductivity_stream_definition() outgoing_stream_l0_conductivity_id = self.pubsubclient.create_stream_definition(container=outgoing_stream_l0_conductivity, name='L0_Conductivity') self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l0_conductivity_id, ctd_L0_all_dprocdef_id ) outgoing_stream_l0_pressure = L0_pressure_stream_definition() outgoing_stream_l0_pressure_id = self.pubsubclient.create_stream_definition(container=outgoing_stream_l0_pressure, name='L0_Pressure') self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l0_pressure_id, ctd_L0_all_dprocdef_id ) outgoing_stream_l0_temperature = L0_temperature_stream_definition() outgoing_stream_l0_temperature_id = self.pubsubclient.create_stream_definition(container=outgoing_stream_l0_temperature, name='L0_Temperature') self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l0_temperature_id, ctd_L0_all_dprocdef_id ) self.output_products={} log.debug("test_createDataProcessUsingSim: create output data product L0 conductivity") ctd_l0_conductivity_output_dp_obj = IonObject( RT.DataProduct, name='L0_Conductivity', description='transform output conductivity', temporal_domain = tdom, spatial_domain = sdom) ctd_l0_conductivity_output_dp_id = self.dataproductclient.create_data_product(ctd_l0_conductivity_output_dp_obj, outgoing_stream_l0_conductivity_id, parameter_dictionary) self.output_products['conductivity'] = ctd_l0_conductivity_output_dp_id self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l0_conductivity_output_dp_id) log.debug("test_createDataProcessUsingSim: create output data product L0 pressure") ctd_l0_pressure_output_dp_obj = IonObject(RT.DataProduct, name='L0_Pressure', description='transform output pressure', temporal_domain = tdom, spatial_domain = sdom) ctd_l0_pressure_output_dp_id = self.dataproductclient.create_data_product(ctd_l0_pressure_output_dp_obj, outgoing_stream_l0_pressure_id, parameter_dictionary) self.output_products['pressure'] = ctd_l0_pressure_output_dp_id self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l0_pressure_output_dp_id) log.debug("test_createDataProcessUsingSim: create output data product L0 temperature") ctd_l0_temperature_output_dp_obj = IonObject(RT.DataProduct, name='L0_Temperature', description='transform output temperature', temporal_domain = tdom, spatial_domain = sdom) ctd_l0_temperature_output_dp_id = self.dataproductclient.create_data_product(ctd_l0_temperature_output_dp_obj, outgoing_stream_l0_temperature_id, parameter_dictionary) self.output_products['temperature'] = ctd_l0_temperature_output_dp_id self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l0_temperature_output_dp_id) #------------------------------- # Create listener for data process events and verify that events are received. #------------------------------- # todo: add this validate for Req: L4-CI-SA-RQ-367 Data processing shall notify registered data product consumers about data processing workflow life cycle events #------------------------------- # L0 Conductivity - Temperature - Pressure: Create the data process #------------------------------- log.debug("test_createDataProcessUsingSim: create data_process start") try: ctd_l0_all_data_process_id = self.dataprocessclient.create_data_process(ctd_L0_all_dprocdef_id, [ctd_parsed_data_product], self.output_products) except BadRequest as ex: self.fail("failed to create new data process: %s" %ex) log.debug("test_createDataProcessUsingSim: data_process created: %s", str(ctd_l0_all_data_process_id)) #------------------------------- # Retrieve a list of all data process defintions in RR and validate that the DPD is listed #------------------------------- # todo: add this validate for Req: L4-CI-SA-RQ-366 Data processing shall manage data topic definitions log.debug("test_createDataProcessUsingSim: activate_data_process ") self.dataprocessclient.activate_data_process(ctd_l0_all_data_process_id) #todo: check that activate event is received L4-CI-SA-RQ-367 # todo: monitor process to se eif it is active (sa-rq-182) # todo: This has not yet been completed by CEI, will prbly surface thru a DPMS call log.debug("test_createDataProcessUsingSim: call CEI interface to monitor ") self.dataproductclient.suspend_data_product_persistence(data_product_id=ctd_raw_data_product) self.dataproductclient.suspend_data_product_persistence(data_product_id=ctd_l0_conductivity_output_dp_id) self.dataproductclient.suspend_data_product_persistence(data_product_id=ctd_l0_pressure_output_dp_id) self.dataproductclient.suspend_data_product_persistence(data_product_id=ctd_l0_temperature_output_dp_id) log.debug("test_createDataProcessUsingSim: deactivate_data_process ") self.dataprocessclient.deactivate_data_process(ctd_l0_all_data_process_id) self.dataprocessclient.delete_data_process(ctd_l0_all_data_process_id)
class TestIntDataProcessManagementServiceMultiOut(IonIntegrationTestCase): def setUp(self): # Start container self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node) self.pubsubclient = PubsubManagementServiceClient(node=self.container.node) self.ingestclient = IngestionManagementServiceClient(node=self.container.node) self.imsclient = InstrumentManagementServiceClient(node=self.container.node) self.dataproductclient = DataProductManagementServiceClient(node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node) self.datasetclient = DatasetManagementServiceClient(node=self.container.node) self.dataset_management = self.datasetclient self.process_dispatcher = ProcessDispatcherServiceClient(node=self.container.node) def test_createDataProcess(self): #--------------------------------------------------------------------------- # Data Process Definition #--------------------------------------------------------------------------- dpd_obj = IonObject(RT.DataProcessDefinition, name='ctd_L0_all', description='transform ctd package into three separate L0 streams', module='ion.processes.data.transforms.ctd.ctd_L0_all', class_name='ctd_L0_all') dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) # Make assertion on the newly registered data process definition data_process_definition = self.rrclient.read(dprocdef_id) self.assertEquals(data_process_definition.name, 'ctd_L0_all') self.assertEquals(data_process_definition.description, 'transform ctd package into three separate L0 streams') self.assertEquals(data_process_definition.module, 'ion.processes.data.transforms.ctd.ctd_L0_all') self.assertEquals(data_process_definition.class_name, 'ctd_L0_all') # Read the data process definition using data process management and make assertions dprocdef_obj = self.dataprocessclient.read_data_process_definition(dprocdef_id) self.assertEquals(dprocdef_obj.class_name,'ctd_L0_all') self.assertEquals(dprocdef_obj.module,'ion.processes.data.transforms.ctd.ctd_L0_all') #--------------------------------------------------------------------------- # Create an input instrument #--------------------------------------------------------------------------- instrument_obj = IonObject(RT.InstrumentDevice, name='Inst1',description='an instrument that is creating the data product') instrument_id, rev = self.rrclient.create(instrument_obj) # Register the instrument so that the data producer and stream object are created data_producer_id = self.damsclient.register_instrument(instrument_id) # create a stream definition for the data from the ctd simulator pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubclient.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_input_stream_definition_to_data_process_definition(ctd_stream_def_id, dprocdef_id ) # Assert that the link between the stream definition and the data process definition was done assocs = self.rrclient.find_associations(subject=dprocdef_id, predicate=PRED.hasInputStreamDefinition, object=ctd_stream_def_id, id_only=True) self.assertIsNotNone(assocs) #--------------------------------------------------------------------------- # Input Data Product #--------------------------------------------------------------------------- tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() input_dp_obj = IonObject( RT.DataProduct, name='InputDataProduct', description='some new dp', temporal_domain = tdom, spatial_domain = sdom) input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=ctd_stream_def_id, exchange_point='test') #Make assertions on the input data product created input_dp_obj = self.rrclient.read(input_dp_id) self.assertEquals(input_dp_obj.name, 'InputDataProduct') self.assertEquals(input_dp_obj.description, 'some new dp') self.damsclient.assign_data_product(instrument_id, input_dp_id) # Retrieve the stream via the DataProduct->Stream associations stream_ids, _ = self.rrclient.find_objects(input_dp_id, PRED.hasStream, None, True) self.in_stream_id = stream_ids[0] #--------------------------------------------------------------------------- # Output Data Product #--------------------------------------------------------------------------- outgoing_stream_conductivity_id = self.pubsubclient.create_stream_definition(name='conductivity', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_conductivity_id, dprocdef_id,binding='conductivity' ) outgoing_stream_pressure_id = self.pubsubclient.create_stream_definition(name='pressure', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_pressure_id, dprocdef_id, binding='pressure' ) outgoing_stream_temperature_id = self.pubsubclient.create_stream_definition(name='temperature', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_temperature_id, dprocdef_id, binding='temperature' ) self.output_products={} output_dp_obj = IonObject(RT.DataProduct, name='conductivity', description='transform output conductivity', temporal_domain = tdom, spatial_domain = sdom) output_dp_id_1 = self.dataproductclient.create_data_product(output_dp_obj, outgoing_stream_conductivity_id) self.output_products['conductivity'] = output_dp_id_1 output_dp_obj = IonObject(RT.DataProduct, name='pressure', description='transform output pressure', temporal_domain = tdom, spatial_domain = sdom) output_dp_id_2 = self.dataproductclient.create_data_product(output_dp_obj, outgoing_stream_pressure_id) self.output_products['pressure'] = output_dp_id_2 output_dp_obj = IonObject(RT.DataProduct, name='temperature', description='transform output ', temporal_domain = tdom, spatial_domain = sdom) output_dp_id_3 = self.dataproductclient.create_data_product(output_dp_obj, outgoing_stream_temperature_id) self.output_products['temperature'] = output_dp_id_3 #--------------------------------------------------------------------------- # Create the data process #--------------------------------------------------------------------------- def _create_data_process(): dproc_id = self.dataprocessclient.create_data_process(dprocdef_id, [input_dp_id], self.output_products) return dproc_id dproc_id = _create_data_process() # Make assertions on the data process created data_process = self.dataprocessclient.read_data_process(dproc_id) # Assert that the data process has a process id attached self.assertIsNotNone(data_process.process_id) # Assert that the data process got the input data product's subscription id attached as its own input_susbcription_id attribute self.assertIsNotNone(data_process.input_subscription_id) output_data_product_ids = self.rrclient.find_objects(subject=dproc_id, predicate=PRED.hasOutputProduct, object_type=RT.DataProduct, id_only=True) self.assertEquals(Set(output_data_product_ids[0]), Set([output_dp_id_1,output_dp_id_2,output_dp_id_3])) @patch.dict(CFG, {'endpoint':{'receive':{'timeout': 60}}}) def test_createDataProcessUsingSim(self): #------------------------------- # Create InstrumentModel #------------------------------- instModel_obj = IonObject(RT.InstrumentModel, name='SBE37IMModel', description="SBE37IMModel" ) instModel_id = self.imsclient.create_instrument_model(instModel_obj) #------------------------------- # Create InstrumentAgent #------------------------------- instAgent_obj = IonObject(RT.InstrumentAgent, name='agent007', description="SBE37IMAgent", driver_uri="http://sddevrepo.oceanobservatories.org/releases/seabird_sbe37smb_ooicore-0.0.1-py2.7.egg") instAgent_id = self.imsclient.create_instrument_agent(instAgent_obj) self.imsclient.assign_instrument_model_to_instrument_agent(instModel_id, instAgent_id) #------------------------------- # Create InstrumentDevice #------------------------------- instDevice_obj = IonObject(RT.InstrumentDevice, name='SBE37IMDevice', description="SBE37IMDevice", serial_number="12345" ) instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj) self.imsclient.assign_instrument_model_to_instrument_device(instModel_id, instDevice_id) #------------------------------- # Create InstrumentAgentInstance to hold configuration information #------------------------------- port_agent_config = { 'device_addr': 'sbe37-simulator.oceanobservatories.org', 'device_port': 4001, 'process_type': PortAgentProcessType.UNIX, 'binary_path': "port_agent", 'command_port': 4002, 'data_port': 4003, 'log_level': 5, } port_agent_config = { 'device_addr': CFG.device.sbe37.host, 'device_port': CFG.device.sbe37.port, 'process_type': PortAgentProcessType.UNIX, 'binary_path': "port_agent", 'port_agent_addr': 'localhost', 'command_port': CFG.device.sbe37.port_agent_cmd_port, 'data_port': CFG.device.sbe37.port_agent_data_port, 'log_level': 5, 'type': PortAgentType.ETHERNET } instAgentInstance_obj = IonObject(RT.InstrumentAgentInstance, name='SBE37IMAgentInstance', description="SBE37IMAgentInstance", port_agent_config = port_agent_config) instAgentInstance_id = self.imsclient.create_instrument_agent_instance(instAgentInstance_obj, instAgent_id, instDevice_id) #------------------------------- # Create CTD Parsed as the first data product #------------------------------- # create a stream definition for the data from the ctd simulator pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubclient.create_stream_definition(name='SBE32_CDM', parameter_dictionary_id=pdict_id) # Construct temporal and spatial Coordinate Reference System objects tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() dp_obj = IonObject(RT.DataProduct, name='ctd_parsed', description='ctd stream test', temporal_domain = tdom, spatial_domain = sdom) ctd_parsed_data_product = self.dataproductclient.create_data_product(dp_obj, ctd_stream_def_id) self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=ctd_parsed_data_product) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product, PRED.hasStream, None, True) #------------------------------- # Create CTD Raw as the second data product #------------------------------- raw_stream_def_id = self.pubsubclient.create_stream_definition(name='SBE37_RAW', parameter_dictionary_id=pdict_id) dp_obj.name = 'ctd_raw' ctd_raw_data_product = self.dataproductclient.create_data_product(dp_obj, raw_stream_def_id) self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=ctd_raw_data_product) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(ctd_raw_data_product, PRED.hasStream, None, True) #------------------------------- # L0 Conductivity - Temperature - Pressure: Data Process Definition #------------------------------- dpd_obj = IonObject(RT.DataProcessDefinition, name='ctd_L0_all', description='transform ctd package into three separate L0 streams', module='ion.processes.data.transforms.ctd.ctd_L0_all', class_name='ctd_L0_all') ctd_L0_all_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) #------------------------------- # L0 Conductivity - Temperature - Pressure: Output Data Products #------------------------------- outgoing_stream_l0_conductivity_id = self.pubsubclient.create_stream_definition(name='L0_Conductivity', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l0_conductivity_id, ctd_L0_all_dprocdef_id, binding='conductivity' ) outgoing_stream_l0_pressure_id = self.pubsubclient.create_stream_definition(name='L0_Pressure', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l0_pressure_id, ctd_L0_all_dprocdef_id, binding='pressure' ) outgoing_stream_l0_temperature_id = self.pubsubclient.create_stream_definition(name='L0_Temperature', parameter_dictionary_id=pdict_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l0_temperature_id, ctd_L0_all_dprocdef_id, binding='temperature' ) self.output_products={} ctd_l0_conductivity_output_dp_obj = IonObject( RT.DataProduct, name='L0_Conductivity', description='transform output conductivity', temporal_domain = tdom, spatial_domain = sdom) ctd_l0_conductivity_output_dp_id = self.dataproductclient.create_data_product(ctd_l0_conductivity_output_dp_obj, outgoing_stream_l0_conductivity_id) self.output_products['conductivity'] = ctd_l0_conductivity_output_dp_id ctd_l0_pressure_output_dp_obj = IonObject(RT.DataProduct, name='L0_Pressure', description='transform output pressure', temporal_domain = tdom, spatial_domain = sdom) ctd_l0_pressure_output_dp_id = self.dataproductclient.create_data_product(ctd_l0_pressure_output_dp_obj, outgoing_stream_l0_pressure_id) self.output_products['pressure'] = ctd_l0_pressure_output_dp_id ctd_l0_temperature_output_dp_obj = IonObject(RT.DataProduct, name='L0_Temperature', description='transform output temperature', temporal_domain = tdom, spatial_domain = sdom) ctd_l0_temperature_output_dp_id = self.dataproductclient.create_data_product(ctd_l0_temperature_output_dp_obj, outgoing_stream_l0_temperature_id) self.output_products['temperature'] = ctd_l0_temperature_output_dp_id #------------------------------- # Create listener for data process events and verify that events are received. #------------------------------- # todo: add this validate for Req: L4-CI-SA-RQ-367 Data processing shall notify registered data product consumers about data processing workflow life cycle events #todo (contd) ... I believe the capability does not exist yet now. ANS And SA are not yet publishing any workflow life cycle events (Swarbhanu) #------------------------------- # L0 Conductivity - Temperature - Pressure: Create the data process #------------------------------- ctd_l0_all_data_process_id = self.dataprocessclient.create_data_process(ctd_L0_all_dprocdef_id, [ctd_parsed_data_product], self.output_products) data_process = self.rrclient.read(ctd_l0_all_data_process_id) process_id = data_process.process_id self.addCleanup(self.process_dispatcher.cancel_process, process_id) #------------------------------- # Wait until the process launched in the create_data_process() method is actually running, before proceeding further in this test #------------------------------- gate = ProcessStateGate(self.process_dispatcher.read_process, process_id, ProcessStateEnum.RUNNING) self.assertTrue(gate.await(30), "The data process (%s) did not spawn in 30 seconds" % process_id) #------------------------------- # Retrieve a list of all data process defintions in RR and validate that the DPD is listed #------------------------------- # todo: Req: L4-CI-SA-RQ-366 Data processing shall manage data topic definitions # todo: data topics are being handled by pub sub at the level of streams self.dataprocessclient.activate_data_process(ctd_l0_all_data_process_id) #todo: check that activate event is received L4-CI-SA-RQ-367 #todo... (it looks like no event is being published when the data process is activated... so below, we just check for now # todo... that the subscription is indeed activated) (Swarbhanu) # todo: monitor process to see if it is active (sa-rq-182) ctd_l0_all_data_process = self.rrclient.read(ctd_l0_all_data_process_id) input_subscription_id = ctd_l0_all_data_process.input_subscription_id subs = self.rrclient.read(input_subscription_id) self.assertTrue(subs.activated) # todo: This has not yet been completed by CEI, will prbly surface thru a DPMS call self.dataprocessclient.deactivate_data_process(ctd_l0_all_data_process_id) #------------------------------- # Retrieve the extended resources for data process definition and for data process #------------------------------- extended_process_definition = self.dataprocessclient.get_data_process_definition_extension(ctd_L0_all_dprocdef_id) self.assertEqual(1, len(extended_process_definition.data_processes)) log.debug("test_createDataProcess: extended_process_definition %s", str(extended_process_definition)) extended_process = self.dataprocessclient.get_data_process_extension(ctd_l0_all_data_process_id) self.assertEqual(1, len(extended_process.input_data_products)) log.debug("test_createDataProcess: extended_process %s", str(extended_process)) ################################ Test the removal of data processes ################################## #------------------------------------------------------------------- # Test the deleting of the data process #------------------------------------------------------------------- # Before deleting, get the input streams, output streams and the subscriptions so that they can be checked after deleting # dp_obj_1 = self.rrclient.read(ctd_l0_all_data_process_id) # input_subscription_id = dp_obj_1.input_subscription_id # out_prods, _ = self.rrclient.find_objects(subject=ctd_l0_all_data_process_id, predicate=PRED.hasOutputProduct, id_only=True) # in_prods, _ = self.rrclient.find_objects(ctd_l0_all_data_process_id, PRED.hasInputProduct, id_only=True) # in_streams = [] # for in_prod in in_prods: # streams, _ = self.rrclient.find_objects(in_prod, PRED.hasStream, id_only=True) # in_streams.extend(streams) # out_streams = [] # for out_prod in out_prods: # streams, _ = self.rrclient.find_objects(out_prod, PRED.hasStream, id_only=True) # out_streams.extend(streams) # Deleting the data process self.dataprocessclient.delete_data_process(ctd_l0_all_data_process_id) # Check that the data process got removed. Check the lcs state. It should be retired dp_obj = self.rrclient.read(ctd_l0_all_data_process_id) self.assertEquals(dp_obj.lcstate, LCS.RETIRED) # Check for process defs still attached to the data process dpd_assn_ids = self.rrclient.find_associations(subject=ctd_l0_all_data_process_id, predicate=PRED.hasProcessDefinition, id_only=True) self.assertEquals(len(dpd_assn_ids), 0) # Check for output data product still attached to the data process out_products, assocs = self.rrclient.find_objects(subject=ctd_l0_all_data_process_id, predicate=PRED.hasOutputProduct, id_only=True) self.assertEquals(len(out_products), 0) self.assertEquals(len(assocs), 0) # Check for input data products still attached to the data process inprod_associations = self.rrclient.find_associations(ctd_l0_all_data_process_id, PRED.hasInputProduct) self.assertEquals(len(inprod_associations), 0) # Check for input data products still attached to the data process inprod_associations = self.rrclient.find_associations(ctd_l0_all_data_process_id, PRED.hasInputProduct) self.assertEquals(len(inprod_associations), 0) # Check of the data process has been deactivated self.assertIsNone(dp_obj.input_subscription_id) # Read the original subscription id of the data process and check that it has been deactivated with self.assertRaises(NotFound): self.pubsubclient.read_subscription(input_subscription_id) #------------------------------------------------------------------- # Delete the data process definition #------------------------------------------------------------------- # before deleting, get the process definition being associated to in order to be able to check later if the latter gets deleted as it should proc_def_ids, proc_def_asocs = self.rrclient.find_objects(ctd_l0_all_data_process_id, PRED.hasProcessDefinition) self.dataprocessclient.delete_data_process_definition(ctd_L0_all_dprocdef_id) # check that the data process definition has been retired dp_proc_def = self.rrclient.read(ctd_L0_all_dprocdef_id) self.assertEquals(dp_proc_def.lcstate, LCS.RETIRED) # Check for old associations of this data process definition proc_defs, proc_def_asocs = self.rrclient.find_objects(ctd_L0_all_dprocdef_id, PRED.hasProcessDefinition) self.assertEquals(len(proc_defs), 0) # find all associations where this is the subject _, obj_assns = self.rrclient.find_objects(subject= ctd_L0_all_dprocdef_id, id_only=True) self.assertEquals(len(obj_assns), 0) ################################ Test the removal of data processes ################################## # Try force delete... This should simply delete the associations and the data process object # from the resource registry #--------------------------------------------------------------------------------------------------------------- # Force deleting a data process #--------------------------------------------------------------------------------------------------------------- self.dataprocessclient.force_delete_data_process(ctd_l0_all_data_process_id) # find all associations where this is the subject _, obj_assns = self.rrclient.find_objects(subject=ctd_l0_all_data_process_id, id_only=True) # find all associations where this is the object _, sbj_assns = self.rrclient.find_subjects(object=ctd_l0_all_data_process_id, id_only=True) self.assertEquals(len(obj_assns), 0) self.assertEquals(len(sbj_assns), 0) with self.assertRaises(NotFound): self.rrclient.read(ctd_l0_all_data_process_id) #--------------------------------------------------------------------------------------------------------------- # Force deleting a data process definition #--------------------------------------------------------------------------------------------------------------- self.dataprocessclient.force_delete_data_process_definition(ctd_L0_all_dprocdef_id) # find all associations where this is the subject _, obj_assns = self.rrclient.find_objects(subject=ctd_l0_all_data_process_id, id_only=True) # find all associations where this is the object _, sbj_assns = self.rrclient.find_subjects(object=ctd_l0_all_data_process_id, id_only=True) self.assertEquals(len(obj_assns), 0) self.assertEquals(len(sbj_assns), 0) with self.assertRaises(NotFound): self.rrclient.read(ctd_l0_all_data_process_id) def test_transform_function_crd(self): tf = TransformFunction(name='simple', module='pyon.ion.process', cls='SimpleProcess') tf_id = self.dataprocessclient.create_transform_function(tf) self.assertTrue(tf_id) tf2_id = self.dataprocessclient.create_transform_function(tf) self.assertEquals(tf_id, tf2_id) tf_obj = self.dataprocessclient.read_transform_function(tf_id) self.assertEquals([tf.name, tf.module, tf.cls, tf.function_type], [tf_obj.name, tf_obj.module, tf_obj.cls, tf_obj.function_type]) tf.module = 'dev.null' self.assertRaises(BadRequest, self.dataprocessclient.create_transform_function, tf) self.dataprocessclient.delete_transform_function(tf_id) self.assertRaises(NotFound, self.dataprocessclient.read_transform_function, tf_id)