def on_start(self): #print ">>>>>>>>>>>>>>>>>>>>>> MPL CFG = ", self.CFG self.pubsub_management = PubsubManagementServiceProcessClient(process=self) self.ssclient = SchedulerServiceProcessClient(process=self) self.rrclient = ResourceRegistryServiceProcessClient(process=self) self.data_retriever_client = DataRetrieverServiceProcessClient(process=self) self.dsm_client = DatasetManagementServiceProcessClient(process=self) self.pubsub_client = PubsubManagementServiceProcessClient(process = self) self.stream_info = self.CFG.get_safe('process.publish_streams',{}) self.stream_names = self.stream_info.keys() self.stream_ids = self.stream_info.values() if not self.stream_names: raise BadRequest('MPL Transform has no output streams.') graph_time_periods= self.CFG.get_safe('graph_time_periods') # If this is meant to be an event driven process, schedule an event to be generated every few minutes/hours self.event_timer_interval = self.CFG.get_safe('graph_gen_interval') if self.event_timer_interval: event_origin = "Interval_Timer_Matplotlib" sub = EventSubscriber(event_type="ResourceEvent", callback=self.interval_timer_callback, origin=event_origin) sub.start() self.interval_timer_id = self.ssclient.create_interval_timer(start_time="now" , interval=self._str_to_secs(self.event_timer_interval), event_origin=event_origin, event_subtype="") super(VizTransformMatplotlibGraphs,self).on_start()
def setUp(self): # Start container logging.disable(logging.ERROR) self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # simulate preloading preload_ion_params(self.container) logging.disable(logging.NOTSET) #Instantiate a process to represent the test process=VisualizationServiceTestProcess() # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceProcessClient(node=self.container.node, process=process) self.damsclient = DataAcquisitionManagementServiceProcessClient(node=self.container.node, process=process) self.pubsubclient = PubsubManagementServiceProcessClient(node=self.container.node, process=process) self.ingestclient = IngestionManagementServiceProcessClient(node=self.container.node, process=process) self.imsclient = InstrumentManagementServiceProcessClient(node=self.container.node, process=process) self.dataproductclient = DataProductManagementServiceProcessClient(node=self.container.node, process=process) self.dataprocessclient = DataProcessManagementServiceProcessClient(node=self.container.node, process=process) self.datasetclient = DatasetManagementServiceProcessClient(node=self.container.node, process=process) self.workflowclient = WorkflowManagementServiceProcessClient(node=self.container.node, process=process) self.process_dispatcher = ProcessDispatcherServiceProcessClient(node=self.container.node, process=process) self.data_retriever = DataRetrieverServiceProcessClient(node=self.container.node, process=process) self.vis_client = VisualizationServiceProcessClient(node=self.container.node, process=process) self.ctd_stream_def = SBE37_CDM_stream_definition()
def setUp(self): # Start container self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # simulate preloading preload_ion_params(self.container) #Instantiate a process to represent the test process=WorkflowServiceTestProcess() # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceProcessClient(node=self.container.node, process=process) self.damsclient = DataAcquisitionManagementServiceProcessClient(node=self.container.node, process=process) self.pubsubclient = PubsubManagementServiceProcessClient(node=self.container.node, process=process) self.ingestclient = IngestionManagementServiceProcessClient(node=self.container.node, process=process) self.imsclient = InstrumentManagementServiceProcessClient(node=self.container.node, process=process) self.dataproductclient = DataProductManagementServiceProcessClient(node=self.container.node, process=process) self.dataprocessclient = DataProcessManagementServiceProcessClient(node=self.container.node, process=process) self.datasetclient = DatasetManagementServiceProcessClient(node=self.container.node, process=process) self.workflowclient = WorkflowManagementServiceProcessClient(node=self.container.node, process=process) self.process_dispatcher = ProcessDispatcherServiceProcessClient(node=self.container.node, process=process) self.data_retriever = DataRetrieverServiceProcessClient(node=self.container.node, process=process) self.ctd_stream_def = SBE37_CDM_stream_definition()
def on_start(self): SimpleProcess.on_start(self) self.data_retriever = DataRetrieverServiceProcessClient(process=self) self.interval_key = self.CFG.get_safe('process.interval_key', None) self.qc_params = self.CFG.get_safe('process.qc_params', []) validate_is_not_none( self.interval_key, 'An interval key is necessary to paunch this process') self.event_subscriber = EventSubscriber(event_type=OT.TimerEvent, origin=self.interval_key, callback=self._event_callback, auto_delete=True) self.add_endpoint(self.event_subscriber) self.resource_registry = self.container.resource_registry self.run_interval = self.CFG.get_safe( 'service.qc_processing.run_interval', 24)
def on_start(self): SimpleProcess.on_start(self) self.data_retriever = DataRetrieverServiceProcessClient(process=self) self.interval_key = self.CFG.get_safe('process.interval_key',None) self.qc_params = self.CFG.get_safe('process.qc_params',[]) validate_is_not_none(self.interval_key, 'An interval key is necessary to paunch this process') self.event_subscriber = EventSubscriber(event_type=OT.TimerEvent, origin=self.interval_key, callback=self._event_callback, auto_delete=True) self.add_endpoint(self.event_subscriber) self.resource_registry = self.container.resource_registry
def on_start(self): #print ">>>>>>>>>>>>>>>>>>>>>> MPL CFG = ", self.CFG self.pubsub_management = PubsubManagementServiceProcessClient( process=self) self.ssclient = SchedulerServiceProcessClient(process=self) self.rrclient = ResourceRegistryServiceProcessClient(process=self) self.data_retriever_client = DataRetrieverServiceProcessClient( process=self) self.dsm_client = DatasetManagementServiceProcessClient(process=self) self.pubsub_client = PubsubManagementServiceProcessClient(process=self) self.stream_info = self.CFG.get_safe('process.publish_streams', {}) self.stream_names = self.stream_info.keys() self.stream_ids = self.stream_info.values() if not self.stream_names: raise BadRequest('MPL Transform has no output streams.') graph_time_periods = self.CFG.get_safe('graph_time_periods') # If this is meant to be an event driven process, schedule an event to be generated every few minutes/hours self.event_timer_interval = self.CFG.get_safe('graph_gen_interval') if self.event_timer_interval: event_origin = "Interval_Timer_Matplotlib" sub = EventSubscriber(event_type="ResourceEvent", callback=self.interval_timer_callback, origin=event_origin) sub.start() self.interval_timer_id = self.ssclient.create_interval_timer( start_time="now", interval=self._str_to_secs(self.event_timer_interval), event_origin=event_origin, event_subtype="") super(VizTransformMatplotlibGraphs, self).on_start()
class TestWorkflowManagementIntegration(VisualizationIntegrationTestHelper): def setUp(self): # Start container self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') #Instantiate a process to represent the test process=WorkflowServiceTestProcess() # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceProcessClient(node=self.container.node, process=process) self.damsclient = DataAcquisitionManagementServiceProcessClient(node=self.container.node, process=process) self.pubsubclient = PubsubManagementServiceProcessClient(node=self.container.node, process=process) self.ingestclient = IngestionManagementServiceProcessClient(node=self.container.node, process=process) self.imsclient = InstrumentManagementServiceProcessClient(node=self.container.node, process=process) self.dataproductclient = DataProductManagementServiceProcessClient(node=self.container.node, process=process) self.dataprocessclient = DataProcessManagementServiceProcessClient(node=self.container.node, process=process) self.datasetclient = DatasetManagementServiceProcessClient(node=self.container.node, process=process) self.workflowclient = WorkflowManagementServiceProcessClient(node=self.container.node, process=process) self.process_dispatcher = ProcessDispatcherServiceProcessClient(node=self.container.node, process=process) self.data_retriever = DataRetrieverServiceProcessClient(node=self.container.node, process=process) self.ctd_stream_def = SBE37_CDM_stream_definition() @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI') def test_SA_transform_components(self): assertions = self.assertTrue #The list of data product streams to monitor data_product_stream_ids = list() #Create the input data product ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product() data_product_stream_ids.append(ctd_stream_id) ### ### Setup the first transformation ### # Salinity: Data Process Definition ctd_L2_salinity_dprocdef_id = self.create_salinity_data_process_definition() l2_salinity_all_data_process_id, ctd_l2_salinity_output_dp_id = self.create_transform_process(ctd_L2_salinity_dprocdef_id,ctd_parsed_data_product_id, 'salinity' ) ## get the stream id for the transform outputs stream_ids, _ = self.rrclient.find_objects(ctd_l2_salinity_output_dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) > 0 ) sal_stream_id = stream_ids[0] data_product_stream_ids.append(sal_stream_id) ### ### Setup the second transformation ### # Salinity Doubler: Data Process Definition salinity_doubler_dprocdef_id = self.create_salinity_doubler_data_process_definition() salinity_double_data_process_id, salinity_doubler_output_dp_id = self.create_transform_process(salinity_doubler_dprocdef_id, ctd_l2_salinity_output_dp_id, 'salinity' ) stream_ids, _ = self.rrclient.find_objects(salinity_doubler_output_dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) > 0 ) sal_dbl_stream_id = stream_ids[0] data_product_stream_ids.append(sal_dbl_stream_id) #Start the output stream listener to monitor and collect messages results = self.start_output_stream_and_listen(ctd_stream_id, data_product_stream_ids) #Stop the transform processes self.dataprocessclient.deactivate_data_process(salinity_double_data_process_id) self.dataprocessclient.deactivate_data_process(l2_salinity_all_data_process_id) #Validate the data from each of the messages along the way self.validate_messages(results) @attr('LOCOINT') @attr('SMOKE') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI') def test_transform_workflow(self): assertions = self.assertTrue log.debug("Building the workflow definition") workflow_def_obj = IonObject(RT.WorkflowDefinition, name='Salinity_Test_Workflow', description='tests a workflow of multiple transform data processes') workflow_data_product_name = 'TEST-Workflow_Output_Product' #Set a specific output product name #------------------------------------------------------------------------------------------------------------------------- log.debug( "Adding a transformation process definition for salinity") #------------------------------------------------------------------------------------------------------------------------- ctd_L2_salinity_dprocdef_id = self.create_salinity_data_process_definition() workflow_step_obj = IonObject('DataProcessWorkflowStep', data_process_definition_id=ctd_L2_salinity_dprocdef_id, persist_process_output_data=False) #Don't persist the intermediate data product workflow_def_obj.workflow_steps.append(workflow_step_obj) #------------------------------------------------------------------------------------------------------------------------- log.debug( "Adding a transformation process definition for salinity doubler") #------------------------------------------------------------------------------------------------------------------------- salinity_doubler_dprocdef_id = self.create_salinity_doubler_data_process_definition() workflow_step_obj = IonObject('DataProcessWorkflowStep', data_process_definition_id=salinity_doubler_dprocdef_id, ) workflow_def_obj.workflow_steps.append(workflow_step_obj) log.debug( "Creating workflow def in the resource registry") workflow_def_id = self.workflowclient.create_workflow_definition(workflow_def_obj) aids = self.rrclient.find_associations(workflow_def_id, PRED.hasDataProcessDefinition) assertions(len(aids) == 2 ) #The list of data product streams to monitor data_product_stream_ids = list() log.debug( "Creating the input data product") ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product() data_product_stream_ids.append(ctd_stream_id) log.debug( "Creating and starting the workflow") workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow(workflow_def_id, ctd_parsed_data_product_id, persist_workflow_data_product=True, output_data_product_name=workflow_data_product_name, timeout=300) workflow_output_ids,_ = self.rrclient.find_subjects(RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True) assertions(len(workflow_output_ids) == 1 ) log.debug( "persisting the output product") #self.dataproductclient.activate_data_product_persistence(workflow_product_id) dataset_ids,_ = self.rrclient.find_objects(workflow_product_id, PRED.hasDataset, RT.Dataset, True) assertions(len(dataset_ids) == 1 ) dataset_id = dataset_ids[0] log.debug( "Verifying the output data product name matches what was specified in the workflow definition") workflow_product = self.rrclient.read(workflow_product_id) assertions(workflow_product.name.startswith(workflow_data_product_name), 'Nope: %s != %s' % (workflow_product.name, workflow_data_product_name)) log.debug( "Walking the associations to find the appropriate output data streams to validate the messages") workflow_dp_ids,_ = self.rrclient.find_objects(workflow_id, PRED.hasDataProduct, RT.DataProduct, True) assertions(len(workflow_dp_ids) == 2 ) for dp_id in workflow_dp_ids: stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) == 1 ) data_product_stream_ids.append(stream_ids[0]) log.debug( "data_product_stream_ids: %s" % data_product_stream_ids) log.debug( "Starting the output stream listener to monitor to collect messages") results = self.start_output_stream_and_listen(ctd_stream_id, data_product_stream_ids) log.debug( "results::: %s" % results) log.debug( "Stopping the workflow processes") self.workflowclient.terminate_data_process_workflow(workflow_id, False, timeout=250) # Should test true at some point log.debug( "Making sure the Workflow object was removed") objs, _ = self.rrclient.find_resources(restype=RT.Workflow) assertions(len(objs) == 0) log.debug( "Validating the data from each of the messages along the way") self.validate_messages(results) log.debug( "Checking to see if dataset id = %s, was persisted, and that it can be retrieved...." % dataset_id) self.validate_data_ingest_retrieve(dataset_id) log.debug( "Cleaning up to make sure delete is correct.") self.workflowclient.delete_workflow_definition(workflow_def_id) workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition) assertions(len(workflow_def_ids) == 0 ) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI') def test_google_dt_transform_workflow(self): assertions = self.assertTrue # Build the workflow definition workflow_def_obj = IonObject(RT.WorkflowDefinition, name='GoogleDT_Test_Workflow',description='Tests the workflow of converting stream data to Google DT') #Add a transformation process definition google_dt_procdef_id = self.create_google_dt_data_process_definition() workflow_step_obj = IonObject('DataProcessWorkflowStep', data_process_definition_id=google_dt_procdef_id, persist_process_output_data=False) workflow_def_obj.workflow_steps.append(workflow_step_obj) #Create it in the resource registry workflow_def_id = self.workflowclient.create_workflow_definition(workflow_def_obj) #The list of data product streams to monitor data_product_stream_ids = list() #Create the input data product ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product() data_product_stream_ids.append(ctd_stream_id) #Create and start the workflow workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow(workflow_def_id, ctd_parsed_data_product_id, timeout=60) workflow_output_ids,_ = self.rrclient.find_subjects(RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True) assertions(len(workflow_output_ids) == 1 ) #Walk the associations to find the appropriate output data streams to validate the messages workflow_dp_ids,_ = self.rrclient.find_objects(workflow_id, PRED.hasDataProduct, RT.DataProduct, True) assertions(len(workflow_dp_ids) == 1 ) for dp_id in workflow_dp_ids: stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) == 1 ) data_product_stream_ids.append(stream_ids[0]) #Start the output stream listener to monitor and collect messages results = self.start_output_stream_and_listen(ctd_stream_id, data_product_stream_ids) #Stop the workflow processes self.workflowclient.terminate_data_process_workflow(workflow_id=workflow_id,delete_data_products=False, timeout=60) # Should test true at some point #Validate the data from each of the messages along the way self.validate_google_dt_transform_results(results) """ # Check to see if ingestion worked. Extract the granules from data_retrieval. # First find the dataset associated with the output dp product ds_ids,_ = self.rrclient.find_objects(workflow_dp_ids[len(workflow_dp_ids) - 1], PRED.hasDataset, RT.Dataset, True) retrieved_granule = self.data_retriever.retrieve(ds_ids[0]) #Validate the data from each of the messages along the way self.validate_google_dt_transform_results(retrieved_granule) """ #Cleanup to make sure delete is correct. self.workflowclient.delete_workflow_definition(workflow_def_id) workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition) assertions(len(workflow_def_ids) == 0 ) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI') def test_mpl_graphs_transform_workflow(self): assertions = self.assertTrue # Build the workflow definition workflow_def_obj = IonObject(RT.WorkflowDefinition, name='Mpl_Graphs_Test_Workflow',description='Tests the workflow of converting stream data to Matplotlib graphs') #Add a transformation process definition mpl_graphs_procdef_id = self.create_mpl_graphs_data_process_definition() workflow_step_obj = IonObject('DataProcessWorkflowStep', data_process_definition_id=mpl_graphs_procdef_id, persist_process_output_data=False) workflow_def_obj.workflow_steps.append(workflow_step_obj) #Create it in the resource registry workflow_def_id = self.workflowclient.create_workflow_definition(workflow_def_obj) #The list of data product streams to monitor data_product_stream_ids = list() #Create the input data product ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product() data_product_stream_ids.append(ctd_stream_id) #Create and start the workflow workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow(workflow_def_id, ctd_parsed_data_product_id, persist_workflow_data_product=True, timeout=60) workflow_output_ids,_ = self.rrclient.find_subjects(RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True) assertions(len(workflow_output_ids) == 1 ) #Walk the associations to find the appropriate output data streams to validate the messages workflow_dp_ids,_ = self.rrclient.find_objects(workflow_id, PRED.hasDataProduct, RT.DataProduct, True) assertions(len(workflow_dp_ids) == 1 ) for dp_id in workflow_dp_ids: stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) == 1 ) data_product_stream_ids.append(stream_ids[0]) #Start the output stream listener to monitor and collect messages results = self.start_output_stream_and_listen(ctd_stream_id, data_product_stream_ids) #Stop the workflow processes self.workflowclient.terminate_data_process_workflow(workflow_id=workflow_id,delete_data_products=False, timeout=60) # Should test true at some point #Validate the data from each of the messages along the way self.validate_mpl_graphs_transform_results(results) # Check to see if ingestion worked. Extract the granules from data_retrieval. # First find the dataset associated with the output dp product ds_ids,_ = self.rrclient.find_objects(workflow_dp_ids[len(workflow_dp_ids) - 1], PRED.hasDataset, RT.Dataset, True) retrieved_granule = self.data_retriever.retrieve_last_data_points(ds_ids[0], 10) #Validate the data from each of the messages along the way self.validate_mpl_graphs_transform_results(retrieved_granule) #Cleanup to make sure delete is correct. self.workflowclient.delete_workflow_definition(workflow_def_id) workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition) assertions(len(workflow_def_ids) == 0 ) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI') def test_multiple_workflow_instances(self): assertions = self.assertTrue # Build the workflow definition workflow_def_obj = IonObject(RT.WorkflowDefinition, name='Multiple_Test_Workflow',description='Tests the workflow of converting stream data') #Add a transformation process definition google_dt_procdef_id = self.create_google_dt_data_process_definition() workflow_step_obj = IonObject('DataProcessWorkflowStep', data_process_definition_id=google_dt_procdef_id, persist_process_output_data=False) workflow_def_obj.workflow_steps.append(workflow_step_obj) #Create it in the resource registry workflow_def_id = self.workflowclient.create_workflow_definition(workflow_def_obj) #The list of data product streams to monitor data_product_stream_ids = list() #Create the first input data product ctd_stream_id1, ctd_parsed_data_product_id1 = self.create_ctd_input_stream_and_data_product('ctd_parsed1') data_product_stream_ids.append(ctd_stream_id1) #Create and start the first workflow workflow_id1, workflow_product_id1 = self.workflowclient.create_data_process_workflow(workflow_def_id, ctd_parsed_data_product_id1, timeout=60) #Create the second input data product ctd_stream_id2, ctd_parsed_data_product_id2 = self.create_ctd_input_stream_and_data_product('ctd_parsed2') data_product_stream_ids.append(ctd_stream_id2) #Create and start the second workflow workflow_id2, workflow_product_id2 = self.workflowclient.create_data_process_workflow(workflow_def_id, ctd_parsed_data_product_id2, timeout=60) #Walk the associations to find the appropriate output data streams to validate the messages workflow_ids,_ = self.rrclient.find_resources(restype=RT.Workflow) assertions(len(workflow_ids) == 2 ) #Start the first input stream process ctd_sim_pid1 = self.start_sinusoidal_input_stream_process(ctd_stream_id1) #Start the second input stream process ctd_sim_pid2 = self.start_simple_input_stream_process(ctd_stream_id2) #Start the output stream listener to monitor a set number of messages being sent through the workflows results = self.start_output_stream_and_listen(None, data_product_stream_ids, message_count_per_stream=5) # stop the flow of messages... self.process_dispatcher.cancel_process(ctd_sim_pid1) # kill the ctd simulator process - that is enough data self.process_dispatcher.cancel_process(ctd_sim_pid2) #Stop the first workflow processes self.workflowclient.terminate_data_process_workflow(workflow_id=workflow_id1,delete_data_products=False, timeout=60) # Should test true at some point #Stop the second workflow processes self.workflowclient.terminate_data_process_workflow(workflow_id=workflow_id2,delete_data_products=False, timeout=60) # Should test true at some point workflow_ids,_ = self.rrclient.find_resources(restype=RT.Workflow) assertions(len(workflow_ids) == 0 ) #Cleanup to make sure delete is correct. self.workflowclient.delete_workflow_definition(workflow_def_id) workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition) assertions(len(workflow_def_ids) == 0 ) aid_list = self.rrclient.find_associations(workflow_def_id, PRED.hasDataProcessDefinition) assertions(len(aid_list) == 0 )
class QCPostProcessing(SimpleProcess): ''' QC Post Processing Process This process provides the capability to ION clients and operators to evaluate the automated quality control flags on various data products. This process should be run periodically with overlapping spans of data to ensure complete dataset QC verification. This parameters that this process accepts as configurations are: - dataset_id: The dataset identifier, required. - start_time: Unix timestamp, defaults to 24 hours in the past - end_time: Unix timestamp, defaults to current time - qc_params: a list of qc functions to evaluate, currently supported functions are: ['glblrng_qc', 'spketst_qc', 'stuckvl_qc'], defaults to all ''' qc_suffixes = ['glblrng_qc', 'spketst_qc', 'stuckvl_qc'] def on_start(self): SimpleProcess.on_start(self) self.data_retriever = DataRetrieverServiceProcessClient(process=self) self.interval_key = self.CFG.get_safe('process.interval_key',None) self.qc_params = self.CFG.get_safe('process.qc_params',[]) validate_is_not_none(self.interval_key, 'An interval key is necessary to paunch this process') self.event_subscriber = EventSubscriber(event_type=OT.TimerEvent, origin=self.interval_key, callback=self._event_callback, auto_delete=True) self.add_endpoint(self.event_subscriber) self.resource_registry = self.container.resource_registry self.run_interval = self.CFG.get_safe('service.qc_processing.run_interval', 24) def _event_callback(self, *args, **kwargs): log.info('QC Post Processing Triggered') dataset_ids, _ = self.resource_registry.find_resources(restype=RT.Dataset, id_only=True) for dataset_id in dataset_ids: log.info('QC Post Processing for dataset %s', dataset_id) try: self.process(dataset_id) except BadRequest as e: if 'Problems reading from the coverage' in e.message: log.error('Failed to read from dataset %s', dataset_id, exc_info=True) def process(self, dataset_id, start_time=0, end_time=0): if not dataset_id: raise BadRequest('No dataset id specified.') now = time.time() start_time = start_time or (now - (3600*(self.run_interval+1))) # Every N hours with 1 of overlap end_time = end_time or now qc_params = [i for i in self.qc_params if i in self.qc_suffixes] or self.qc_suffixes self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent) log.debug('Iterating over the data blocks') for st,et in self.chop(int(start_time),int(end_time)): log.debug('Chopping %s:%s', st, et) log.debug("Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')", dataset_id, st, et) try: granule = self.data_retriever.retrieve(dataset_id, query={'start_time':st, 'end_time':et}) except BadRequest: data_products, _ = self.container.resource_registry.find_subjects(object=dataset_id, predicate=PRED.hasDataset, subject_type=RT.DataProduct) for data_product in data_products: log.exception('Failed to perform QC Post Processing on %s', data_product.name) log.error('Calculated Start Time: %s', st) log.error('Calculated End Time: %s', et) raise log.debug('Retrieved Data') rdt = RecordDictionaryTool.load_from_granule(granule) qc_fields = [i for i in rdt.fields if any([i.endswith(j) for j in qc_params])] log.debug('QC Fields: %s', qc_fields) for field in qc_fields: val = rdt[field] if val is None: continue if not np.all(val): log.debug('Found QC Alerts') indexes = np.where(val==0) timestamps = rdt[rdt.temporal_parameter][indexes[0]] self.flag_qc_parameter(dataset_id, field, timestamps.tolist(),{}) def flag_qc_parameter(self, dataset_id, parameter, temporal_values, configuration): log.info('Flagging QC for %s', parameter) data_product_ids, _ = self.resource_registry.find_subjects(object=dataset_id, subject_type=RT.DataProduct, predicate=PRED.hasDataset, id_only=True) for data_product_id in data_product_ids: self.qc_publisher.publish_event(origin=data_product_id, qc_parameter=parameter, temporal_values=temporal_values, configuration=configuration) @classmethod def chop(cls, start_time, end_time): while start_time < end_time: yield (start_time, min(start_time+3600, end_time)) start_time = min(start_time+3600, end_time) return
class VizTransformMatplotlibGraphs(TransformStreamPublisher, TransformEventListener, TransformStreamListener): """ This class is used for instantiating worker processes that have subscriptions to data streams and convert incoming data from CDM format to Matplotlib graphs """ output_bindings = ['graph_image_param_dict'] event_timer_interval = None def on_start(self): #print ">>>>>>>>>>>>>>>>>>>>>> MPL CFG = ", self.CFG self.pubsub_management = PubsubManagementServiceProcessClient(process=self) self.ssclient = SchedulerServiceProcessClient(process=self) self.rrclient = ResourceRegistryServiceProcessClient(process=self) self.data_retriever_client = DataRetrieverServiceProcessClient(process=self) self.dsm_client = DatasetManagementServiceProcessClient(process=self) self.pubsub_client = PubsubManagementServiceProcessClient(process = self) self.stream_info = self.CFG.get_safe('process.publish_streams',{}) self.stream_names = self.stream_info.keys() self.stream_ids = self.stream_info.values() if not self.stream_names: raise BadRequest('MPL Transform has no output streams.') graph_time_periods= self.CFG.get_safe('graph_time_periods') # If this is meant to be an event driven process, schedule an event to be generated every few minutes/hours self.event_timer_interval = self.CFG.get_safe('graph_gen_interval') if self.event_timer_interval: event_origin = "Interval_Timer_Matplotlib" sub = EventSubscriber(event_type="ResourceEvent", callback=self.interval_timer_callback, origin=event_origin) sub.start() self.interval_timer_id = self.ssclient.create_interval_timer(start_time="now" , interval=self._str_to_secs(self.event_timer_interval), event_origin=event_origin, event_subtype="") super(VizTransformMatplotlibGraphs,self).on_start() # when tranform is used as a data process def recv_packet(self, packet, in_stream_route, in_stream_id): #Check to see if the class instance was set up as a event triggered transform. If yes, skip the packet if self.event_timer_interval: return log.info('Received packet') mpl_data_granule = VizTransformMatplotlibGraphsAlgorithm.execute(packet, params=self.get_stream_definition()) for stream_name in self.stream_names: publisher = getattr(self, stream_name) publisher.publish(mpl_data_granule) def get_stream_definition(self): stream_id = self.stream_ids[0] stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) return stream_def._id def process_event(self, msg, headers): return def interval_timer_callback(self, *args, **kwargs): #Find out the input data product to this process in_dp_id = self.CFG.get_safe('in_dp_id') print " >>>>>>>>>>>>>> IN DP ID from cfg : ", in_dp_id # get the dataset_id associated with the data_product. Need it to do the data retrieval ds_ids,_ = self.rrclient.find_objects(in_dp_id, PRED.hasDataset, RT.Dataset, True) if ds_ids is None or not ds_ids: return None # retrieve data for the specified time interval. Setup up query from pass config first query = {} param_list_str = self.CFG.get_safe('parameters') if param_list_str: query['parameters'] = param_list_str.split(', ') # append time if not present in list of parameters if not 'time' in query['parameters']: query['parameters'].append('time') query['start_time'] = query['end_time'] = ntplib.system_to_ntp_time(time.time()) # Now query['stride_time'] = 1 if self.CFG.get_safe('graph_time_period'): query['start_time'] = query['end_time'] - self._str_to_secs(self.CFG.get_safe('graph_time_period')) #print " >>>>>>>>>>>>>> QUERY = ", query #retrieved_granule = self.data_retriever_client.retrieve(ds_ids[0],{'start_time':start_time,'end_time':end_time}) retrieved_granule = self.data_retriever_client.retrieve(ds_ids[0], query=query) # add extra parameters to query passed in config that are not needed by data retrieval if self.CFG.get_safe('resolution'): query['resolution'] = self.CFG.get_safe('resolution') # send the granule through the Algorithm code to get the matplotlib graphs mpl_pdict_id = self.dsm_client.read_parameter_dictionary_by_name('graph_image_param_dict',id_only=True) mpl_stream_def = self.pubsub_client.create_stream_definition('mpl', parameter_dictionary_id=mpl_pdict_id) fileName = self.CFG.get_safe('graph_time_period') mpl_data_granule = VizTransformMatplotlibGraphsAlgorithm.execute(retrieved_granule, config=query, params=mpl_stream_def, fileName=fileName) if mpl_data_granule == None: return None # publish on all specified output streams for stream_name in self.stream_names: publisher = getattr(self, stream_name) publisher.publish(mpl_data_granule) return def _str_to_secs(self, time_period): # this method converts commonly used time periods to its actual seconds counterpart #separate alpha and numeric parts of the time period time_n = time_period.lower().rstrip('abcdefghijklmnopqrstuvwxyz ') time_a = time_period.lower().lstrip('0123456789. ') # determine if user specified, secs, mins, hours, days, weeks, months, years factor = None if time_a == 'sec' or time_a == "secs" or time_a == 'second' or time_a == "seconds": factor = 1 if time_a == "min" or time_a == "mins" or time_a == "minute" or time_a == "minutes": factor = 60 if time_a == "hr" or time_a == "hrs" or time_a == "hour" or time_a == "hours": factor = 60 * 60 if time_a == "day" or time_a == "days": factor = 60 * 60 * 24 if time_a == "wk" or time_a == "wks" or time_a == "week" or time_a == "weeks": factor = 60 * 60 * 24 * 7 if time_a == "mon" or time_a == "mons" or time_a == "month" or time_a == "months": factor = 60 * 60 * 24 * 30 if time_a == "yr" or time_a == "yrs" or time_a == "year" or time_a == "years": factor = 60 * 60 * 24 * 365 time_period_secs = float(time_n) * factor return time_period_secs def on_quit(self): #Cancel the timer if hasattr(self, 'interval_timer_id'): self.ssclient.cancel_timer(self.interval_timer_id) super(VizTransformMatplotlibGraphs,self).on_quit()
class QCPostProcessing(SimpleProcess): ''' QC Post Processing Process This process provides the capability to ION clients and operators to evaluate the automated quality control flags on various data products. This process should be run periodically with overlapping spans of data to ensure complete dataset QC verification. This parameters that this process accepts as configurations are: - dataset_id: The dataset identifier, required. - start_time: Unix timestamp, defaults to 24 hours in the past - end_time: Unix timestamp, defaults to current time - qc_params: a list of qc functions to evaluate, currently supported functions are: ['glblrng_qc', 'spketst_qc', 'stuckvl_qc'], defaults to all ''' qc_suffixes = ['glblrng_qc', 'spketst_qc', 'stuckvl_qc'] def on_start(self): SimpleProcess.on_start(self) self.data_retriever = DataRetrieverServiceProcessClient(process=self) self.interval_key = self.CFG.get_safe('process.interval_key', None) self.qc_params = self.CFG.get_safe('process.qc_params', []) validate_is_not_none( self.interval_key, 'An interval key is necessary to paunch this process') self.event_subscriber = EventSubscriber(event_type=OT.TimerEvent, origin=self.interval_key, callback=self._event_callback, auto_delete=True) self.add_endpoint(self.event_subscriber) self.resource_registry = self.container.resource_registry self.run_interval = self.CFG.get_safe( 'service.qc_processing.run_interval', 24) def _event_callback(self, *args, **kwargs): log.info('QC Post Processing Triggered') dataset_ids, _ = self.resource_registry.find_resources( restype=RT.Dataset, id_only=True) for dataset_id in dataset_ids: log.info('QC Post Processing for dataset %s', dataset_id) try: self.process(dataset_id) except BadRequest as e: if 'Problems reading from the coverage' in e.message: log.error('Failed to read from dataset') def process(self, dataset_id, start_time=0, end_time=0): if not dataset_id: raise BadRequest('No dataset id specified.') now = time.time() start_time = start_time or (now - (3600 * (self.run_interval + 1)) ) # Every N hours with 1 of overlap end_time = end_time or now qc_params = [i for i in self.qc_params if i in self.qc_suffixes ] or self.qc_suffixes self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent) log.debug('Iterating over the data blocks') for st, et in self.chop(int(start_time), int(end_time)): log.debug('Chopping %s:%s', st, et) log.debug( "Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')", dataset_id, st, et) granule = self.data_retriever.retrieve(dataset_id, query={ 'start_time': st, 'end_time': et }) log.debug('Retrieved Data') rdt = RecordDictionaryTool.load_from_granule(granule) qc_fields = [ i for i in rdt.fields if any([i.endswith(j) for j in qc_params]) ] log.debug('QC Fields: %s', qc_fields) for field in qc_fields: val = rdt[field] if val is None: continue if not np.all(val): log.debug('Found QC Alerts') indexes = np.where(val == 0) timestamps = rdt[rdt.temporal_parameter][indexes[0]] self.flag_qc_parameter(dataset_id, field, timestamps.tolist(), {}) def flag_qc_parameter(self, dataset_id, parameter, temporal_values, configuration): log.info('Flagging QC for %s', parameter) data_product_ids, _ = self.resource_registry.find_subjects( object=dataset_id, subject_type=RT.DataProduct, predicate=PRED.hasDataset, id_only=True) for data_product_id in data_product_ids: self.qc_publisher.publish_event(origin=data_product_id, qc_parameter=parameter, temporal_values=temporal_values, configuration=configuration) @classmethod def chop(cls, start_time, end_time): while start_time < end_time: yield (start_time, min(start_time + 3600, end_time)) start_time = min(start_time + 3600, end_time) return
class VizTransformMatplotlibGraphs(TransformStreamPublisher, TransformEventListener, TransformStreamListener): """ This class is used for instantiating worker processes that have subscriptions to data streams and convert incoming data from CDM format to Matplotlib graphs """ output_bindings = ['graph_image_param_dict'] event_timer_interval = None def on_start(self): #print ">>>>>>>>>>>>>>>>>>>>>> MPL CFG = ", self.CFG self.pubsub_management = PubsubManagementServiceProcessClient( process=self) self.ssclient = SchedulerServiceProcessClient(process=self) self.rrclient = ResourceRegistryServiceProcessClient(process=self) self.data_retriever_client = DataRetrieverServiceProcessClient( process=self) self.dsm_client = DatasetManagementServiceProcessClient(process=self) self.pubsub_client = PubsubManagementServiceProcessClient(process=self) self.stream_info = self.CFG.get_safe('process.publish_streams', {}) self.stream_names = self.stream_info.keys() self.stream_ids = self.stream_info.values() if not self.stream_names: raise BadRequest('MPL Transform has no output streams.') graph_time_periods = self.CFG.get_safe('graph_time_periods') # If this is meant to be an event driven process, schedule an event to be generated every few minutes/hours self.event_timer_interval = self.CFG.get_safe('graph_gen_interval') if self.event_timer_interval: event_origin = "Interval_Timer_Matplotlib" sub = EventSubscriber(event_type="ResourceEvent", callback=self.interval_timer_callback, origin=event_origin) sub.start() self.interval_timer_id = self.ssclient.create_interval_timer( start_time="now", interval=self._str_to_secs(self.event_timer_interval), event_origin=event_origin, event_subtype="") super(VizTransformMatplotlibGraphs, self).on_start() # when tranform is used as a data process def recv_packet(self, packet, in_stream_route, in_stream_id): #Check to see if the class instance was set up as a event triggered transform. If yes, skip the packet if self.event_timer_interval: return log.info('Received packet') mpl_data_granule = VizTransformMatplotlibGraphsAlgorithm.execute( packet, params=self.get_stream_definition()) for stream_name in self.stream_names: publisher = getattr(self, stream_name) publisher.publish(mpl_data_granule) def get_stream_definition(self): stream_id = self.stream_ids[0] stream_def = self.pubsub_management.read_stream_definition( stream_id=stream_id) return stream_def._id def process_event(self, msg, headers): return def interval_timer_callback(self, *args, **kwargs): #Find out the input data product to this process in_dp_id = self.CFG.get_safe('in_dp_id') print " >>>>>>>>>>>>>> IN DP ID from cfg : ", in_dp_id # get the dataset_id associated with the data_product. Need it to do the data retrieval ds_ids, _ = self.rrclient.find_objects(in_dp_id, PRED.hasDataset, RT.Dataset, True) if ds_ids is None or not ds_ids: return None # retrieve data for the specified time interval. Setup up query from pass config first query = {} param_list_str = self.CFG.get_safe('parameters') if param_list_str: query['parameters'] = param_list_str.split(', ') # append time if not present in list of parameters if not 'time' in query['parameters']: query['parameters'].append('time') query['start_time'] = query['end_time'] = ntplib.system_to_ntp_time( time.time()) # Now query['stride_time'] = 1 if self.CFG.get_safe('graph_time_period'): query['start_time'] = query['end_time'] - self._str_to_secs( self.CFG.get_safe('graph_time_period')) #print " >>>>>>>>>>>>>> QUERY = ", query #retrieved_granule = self.data_retriever_client.retrieve(ds_ids[0],{'start_time':start_time,'end_time':end_time}) retrieved_granule = self.data_retriever_client.retrieve(ds_ids[0], query=query) # add extra parameters to query passed in config that are not needed by data retrieval if self.CFG.get_safe('resolution'): query['resolution'] = self.CFG.get_safe('resolution') # send the granule through the Algorithm code to get the matplotlib graphs mpl_pdict_id = self.dsm_client.read_parameter_dictionary_by_name( 'graph_image_param_dict', id_only=True) mpl_stream_def = self.pubsub_client.create_stream_definition( 'mpl', parameter_dictionary_id=mpl_pdict_id) fileName = self.CFG.get_safe('graph_time_period') mpl_data_granule = VizTransformMatplotlibGraphsAlgorithm.execute( retrieved_granule, config=query, params=mpl_stream_def, fileName=fileName) if mpl_data_granule == None: return None # publish on all specified output streams for stream_name in self.stream_names: publisher = getattr(self, stream_name) publisher.publish(mpl_data_granule) return def _str_to_secs(self, time_period): # this method converts commonly used time periods to its actual seconds counterpart #separate alpha and numeric parts of the time period time_n = time_period.lower().rstrip('abcdefghijklmnopqrstuvwxyz ') time_a = time_period.lower().lstrip('0123456789. ') # determine if user specified, secs, mins, hours, days, weeks, months, years factor = None if time_a == 'sec' or time_a == "secs" or time_a == 'second' or time_a == "seconds": factor = 1 if time_a == "min" or time_a == "mins" or time_a == "minute" or time_a == "minutes": factor = 60 if time_a == "hr" or time_a == "hrs" or time_a == "hour" or time_a == "hours": factor = 60 * 60 if time_a == "day" or time_a == "days": factor = 60 * 60 * 24 if time_a == "wk" or time_a == "wks" or time_a == "week" or time_a == "weeks": factor = 60 * 60 * 24 * 7 if time_a == "mon" or time_a == "mons" or time_a == "month" or time_a == "months": factor = 60 * 60 * 24 * 30 if time_a == "yr" or time_a == "yrs" or time_a == "year" or time_a == "years": factor = 60 * 60 * 24 * 365 time_period_secs = float(time_n) * factor return time_period_secs def on_quit(self): #Cancel the timer if hasattr(self, 'interval_timer_id'): self.ssclient.cancel_timer(self.interval_timer_id) super(VizTransformMatplotlibGraphs, self).on_quit()
class TestWorkflowManagementIntegration(VisualizationIntegrationTestHelper): def setUp(self): # Start container self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # simulate preloading preload_ion_params(self.container) #Instantiate a process to represent the test process = WorkflowServiceTestProcess() # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceProcessClient( node=self.container.node, process=process) self.damsclient = DataAcquisitionManagementServiceProcessClient( node=self.container.node, process=process) self.pubsubclient = PubsubManagementServiceProcessClient( node=self.container.node, process=process) self.ingestclient = IngestionManagementServiceProcessClient( node=self.container.node, process=process) self.imsclient = InstrumentManagementServiceProcessClient( node=self.container.node, process=process) self.dataproductclient = DataProductManagementServiceProcessClient( node=self.container.node, process=process) self.dataprocessclient = DataProcessManagementServiceProcessClient( node=self.container.node, process=process) self.datasetclient = DatasetManagementServiceProcessClient( node=self.container.node, process=process) self.workflowclient = WorkflowManagementServiceProcessClient( node=self.container.node, process=process) self.process_dispatcher = ProcessDispatcherServiceProcessClient( node=self.container.node, process=process) self.data_retriever = DataRetrieverServiceProcessClient( node=self.container.node, process=process) self.ctd_stream_def = SBE37_CDM_stream_definition() @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Not integrated for CEI') def test_SA_transform_components(self): assertions = self.assertTrue #The list of data product streams to monitor data_product_stream_ids = list() #Create the input data product ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product( ) data_product_stream_ids.append(ctd_stream_id) ### ### Setup the first transformation ### # Salinity: Data Process Definition ctd_L2_salinity_dprocdef_id = self.create_salinity_data_process_definition( ) l2_salinity_all_data_process_id, ctd_l2_salinity_output_dp_id = self.create_transform_process( ctd_L2_salinity_dprocdef_id, ctd_parsed_data_product_id, 'salinity') ## get the stream id for the transform outputs stream_ids, _ = self.rrclient.find_objects( ctd_l2_salinity_output_dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) > 0) sal_stream_id = stream_ids[0] data_product_stream_ids.append(sal_stream_id) ### ### Setup the second transformation ### # Salinity Doubler: Data Process Definition salinity_doubler_dprocdef_id = self.create_salinity_doubler_data_process_definition( ) salinity_double_data_process_id, salinity_doubler_output_dp_id = self.create_transform_process( salinity_doubler_dprocdef_id, ctd_l2_salinity_output_dp_id, 'salinity') stream_ids, _ = self.rrclient.find_objects( salinity_doubler_output_dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) > 0) sal_dbl_stream_id = stream_ids[0] data_product_stream_ids.append(sal_dbl_stream_id) #Start the output stream listener to monitor and collect messages results = self.start_output_stream_and_listen(ctd_stream_id, data_product_stream_ids) #Stop the transform processes self.dataprocessclient.deactivate_data_process( salinity_double_data_process_id) self.dataprocessclient.deactivate_data_process( l2_salinity_all_data_process_id) #Validate the data from each of the messages along the way self.validate_messages(results) @attr('LOCOINT') @attr('SMOKE') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Not integrated for CEI') def test_transform_workflow(self): assertions = self.assertTrue log.debug("Building the workflow definition") workflow_def_obj = IonObject( RT.WorkflowDefinition, name='Salinity_Test_Workflow', description='tests a workflow of multiple transform data processes' ) workflow_data_product_name = 'TEST-Workflow_Output_Product' #Set a specific output product name #------------------------------------------------------------------------------------------------------------------------- log.debug("Adding a transformation process definition for salinity") #------------------------------------------------------------------------------------------------------------------------- ctd_L2_salinity_dprocdef_id = self.create_salinity_data_process_definition( ) workflow_step_obj = IonObject( 'DataProcessWorkflowStep', data_process_definition_id=ctd_L2_salinity_dprocdef_id, persist_process_output_data=False ) #Don't persist the intermediate data product workflow_def_obj.workflow_steps.append(workflow_step_obj) #------------------------------------------------------------------------------------------------------------------------- log.debug( "Adding a transformation process definition for salinity doubler") #------------------------------------------------------------------------------------------------------------------------- salinity_doubler_dprocdef_id = self.create_salinity_doubler_data_process_definition( ) workflow_step_obj = IonObject( 'DataProcessWorkflowStep', data_process_definition_id=salinity_doubler_dprocdef_id, ) workflow_def_obj.workflow_steps.append(workflow_step_obj) log.debug("Creating workflow def in the resource registry") workflow_def_id = self.workflowclient.create_workflow_definition( workflow_def_obj) aids = self.rrclient.find_associations(workflow_def_id, PRED.hasDataProcessDefinition) assertions(len(aids) == 2) #The list of data product streams to monitor data_product_stream_ids = list() log.debug("Creating the input data product") ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product( ) data_product_stream_ids.append(ctd_stream_id) log.debug("Creating and starting the workflow") workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow( workflow_def_id, ctd_parsed_data_product_id, persist_workflow_data_product=True, output_data_product_name=workflow_data_product_name, timeout=300) workflow_output_ids, _ = self.rrclient.find_subjects( RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True) assertions(len(workflow_output_ids) == 1) log.debug("persisting the output product") #self.dataproductclient.activate_data_product_persistence(workflow_product_id) dataset_ids, _ = self.rrclient.find_objects(workflow_product_id, PRED.hasDataset, RT.Dataset, True) assertions(len(dataset_ids) == 1) dataset_id = dataset_ids[0] log.debug( "Verifying the output data product name matches what was specified in the workflow definition" ) workflow_product = self.rrclient.read(workflow_product_id) assertions( workflow_product.name.startswith(workflow_data_product_name), 'Nope: %s != %s' % (workflow_product.name, workflow_data_product_name)) log.debug( "Walking the associations to find the appropriate output data streams to validate the messages" ) workflow_dp_ids, _ = self.rrclient.find_objects( workflow_id, PRED.hasDataProduct, RT.DataProduct, True) assertions(len(workflow_dp_ids) == 2) for dp_id in workflow_dp_ids: stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) == 1) data_product_stream_ids.append(stream_ids[0]) log.debug("data_product_stream_ids: %s" % data_product_stream_ids) log.debug( "Starting the output stream listener to monitor to collect messages" ) results = self.start_output_stream_and_listen(ctd_stream_id, data_product_stream_ids) log.debug("results::: %s" % results) log.debug("Stopping the workflow processes") self.workflowclient.terminate_data_process_workflow( workflow_id, False, timeout=250) # Should test true at some point log.debug("Making sure the Workflow object was removed") objs, _ = self.rrclient.find_resources(restype=RT.Workflow) assertions(len(objs) == 0) log.debug( "Validating the data from each of the messages along the way") self.validate_messages(results) log.debug( "Checking to see if dataset id = %s, was persisted, and that it can be retrieved...." % dataset_id) self.validate_data_ingest_retrieve(dataset_id) log.debug("Cleaning up to make sure delete is correct.") self.workflowclient.delete_workflow_definition(workflow_def_id) """ workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition) assertions(len(workflow_def_ids) == 0 ) """ @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Not integrated for CEI') def test_highcharts_transform_workflow(self): assertions = self.assertTrue # Build the workflow definition workflow_def_obj = IonObject( RT.WorkflowDefinition, name='HighCharts_Test_Workflow', description= 'Tests the workflow of converting stream data to HighCharts') #Add a transformation process definition highcharts_procdef_id = self.create_highcharts_data_process_definition( ) workflow_step_obj = IonObject( 'DataProcessWorkflowStep', data_process_definition_id=highcharts_procdef_id, persist_process_output_data=False) workflow_def_obj.workflow_steps.append(workflow_step_obj) #Create it in the resource registry workflow_def_id = self.workflowclient.create_workflow_definition( workflow_def_obj) #The list of data product streams to monitor data_product_stream_ids = list() #Create the input data product ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product( ) data_product_stream_ids.append(ctd_stream_id) #Create and start the workflow workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow( workflow_def_id, ctd_parsed_data_product_id, timeout=60) workflow_output_ids, _ = self.rrclient.find_subjects( RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True) assertions(len(workflow_output_ids) == 1) #Walk the associations to find the appropriate output data streams to validate the messages workflow_dp_ids, _ = self.rrclient.find_objects( workflow_id, PRED.hasDataProduct, RT.DataProduct, True) assertions(len(workflow_dp_ids) == 1) for dp_id in workflow_dp_ids: stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) == 1) data_product_stream_ids.append(stream_ids[0]) #Start the output stream listener to monitor and collect messages results = self.start_output_stream_and_listen(ctd_stream_id, data_product_stream_ids) #Stop the workflow processes self.workflowclient.terminate_data_process_workflow( workflow_id=workflow_id, delete_data_products=False, timeout=60) # Should test true at some point #Validate the data from each of the messages along the way self.validate_highcharts_transform_results(results) #Cleanup to make sure delete is correct. self.workflowclient.delete_workflow_definition(workflow_def_id) """ workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition) assertions(len(workflow_def_ids) == 0 ) """ @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Not integrated for CEI') def test_mpl_graphs_transform_workflow(self): assertions = self.assertTrue # Build the workflow definition workflow_def_obj = IonObject( RT.WorkflowDefinition, name='Mpl_Graphs_Test_Workflow', description= 'Tests the workflow of converting stream data to Matplotlib graphs' ) #Add a transformation process definition mpl_graphs_procdef_id = self.create_mpl_graphs_data_process_definition( ) workflow_step_obj = IonObject( 'DataProcessWorkflowStep', data_process_definition_id=mpl_graphs_procdef_id, persist_process_output_data=False) workflow_def_obj.workflow_steps.append(workflow_step_obj) #Create it in the resource registry workflow_def_id = self.workflowclient.create_workflow_definition( workflow_def_obj) #The list of data product streams to monitor data_product_stream_ids = list() #Create the input data product ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product( ) data_product_stream_ids.append(ctd_stream_id) #Create and start the workflow workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow( workflow_def_id, ctd_parsed_data_product_id, persist_workflow_data_product=True, timeout=60) workflow_output_ids, _ = self.rrclient.find_subjects( RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True) assertions(len(workflow_output_ids) == 1) #Walk the associations to find the appropriate output data streams to validate the messages workflow_dp_ids, _ = self.rrclient.find_objects( workflow_id, PRED.hasDataProduct, RT.DataProduct, True) assertions(len(workflow_dp_ids) == 1) for dp_id in workflow_dp_ids: stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) == 1) data_product_stream_ids.append(stream_ids[0]) #Start the output stream listener to monitor and collect messages results = self.start_output_stream_and_listen(ctd_stream_id, data_product_stream_ids) #Stop the workflow processes self.workflowclient.terminate_data_process_workflow( workflow_id=workflow_id, delete_data_products=False, timeout=60) # Should test true at some point #Validate the data from each of the messages along the way self.validate_mpl_graphs_transform_results(results) # Check to see if ingestion worked. Extract the granules from data_retrieval. # First find the dataset associated with the output dp product ds_ids, _ = self.rrclient.find_objects( workflow_dp_ids[len(workflow_dp_ids) - 1], PRED.hasDataset, RT.Dataset, True) retrieved_granule = self.data_retriever.retrieve_last_data_points( ds_ids[0], 10) #Validate the data from each of the messages along the way self.validate_mpl_graphs_transform_results(retrieved_granule) #Cleanup to make sure delete is correct. self.workflowclient.delete_workflow_definition(workflow_def_id) """ workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition) assertions(len(workflow_def_ids) == 0 ) """ @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Not integrated for CEI') def test_multiple_workflow_instances(self): assertions = self.assertTrue # Build the workflow definition workflow_def_obj = IonObject( RT.WorkflowDefinition, name='Multiple_Test_Workflow', description='Tests the workflow of converting stream data') #Add a transformation process definition highcharts_procdef_id = self.create_highcharts_data_process_definition( ) workflow_step_obj = IonObject( 'DataProcessWorkflowStep', data_process_definition_id=highcharts_procdef_id, persist_process_output_data=False) workflow_def_obj.workflow_steps.append(workflow_step_obj) #Create it in the resource registry workflow_def_id = self.workflowclient.create_workflow_definition( workflow_def_obj) #The list of data product streams to monitor data_product_stream_ids = list() #Create the first input data product ctd_stream_id1, ctd_parsed_data_product_id1 = self.create_ctd_input_stream_and_data_product( 'ctd_parsed1') data_product_stream_ids.append(ctd_stream_id1) #Create and start the first workflow workflow_id1, workflow_product_id1 = self.workflowclient.create_data_process_workflow( workflow_def_id, ctd_parsed_data_product_id1, timeout=60) #Create the second input data product ctd_stream_id2, ctd_parsed_data_product_id2 = self.create_ctd_input_stream_and_data_product( 'ctd_parsed2') data_product_stream_ids.append(ctd_stream_id2) #Create and start the second workflow workflow_id2, workflow_product_id2 = self.workflowclient.create_data_process_workflow( workflow_def_id, ctd_parsed_data_product_id2, timeout=60) #Walk the associations to find the appropriate output data streams to validate the messages workflow_ids, _ = self.rrclient.find_resources(restype=RT.Workflow) assertions(len(workflow_ids) == 2) #Start the first input stream process ctd_sim_pid1 = self.start_sinusoidal_input_stream_process( ctd_stream_id1) #Start the second input stream process ctd_sim_pid2 = self.start_simple_input_stream_process(ctd_stream_id2) #Start the output stream listener to monitor a set number of messages being sent through the workflows results = self.start_output_stream_and_listen( None, data_product_stream_ids, message_count_per_stream=5) # stop the flow of messages... self.process_dispatcher.cancel_process( ctd_sim_pid1 ) # kill the ctd simulator process - that is enough data self.process_dispatcher.cancel_process(ctd_sim_pid2) #Stop the first workflow processes self.workflowclient.terminate_data_process_workflow( workflow_id=workflow_id1, delete_data_products=False, timeout=60) # Should test true at some point #Stop the second workflow processes self.workflowclient.terminate_data_process_workflow( workflow_id=workflow_id2, delete_data_products=False, timeout=60) # Should test true at some point workflow_ids, _ = self.rrclient.find_resources(restype=RT.Workflow) assertions(len(workflow_ids) == 0) #Cleanup to make sure delete is correct. self.workflowclient.delete_workflow_definition(workflow_def_id) """