Esempio n. 1
0
    def on_start(self):
        #print ">>>>>>>>>>>>>>>>>>>>>> MPL CFG = ", self.CFG

        self.pubsub_management = PubsubManagementServiceProcessClient(process=self)
        self.ssclient = SchedulerServiceProcessClient(process=self)
        self.rrclient = ResourceRegistryServiceProcessClient(process=self)
        self.data_retriever_client = DataRetrieverServiceProcessClient(process=self)
        self.dsm_client = DatasetManagementServiceProcessClient(process=self)
        self.pubsub_client = PubsubManagementServiceProcessClient(process = self)

        self.stream_info  = self.CFG.get_safe('process.publish_streams',{})
        self.stream_names = self.stream_info.keys()
        self.stream_ids   = self.stream_info.values()

        if not self.stream_names:
            raise BadRequest('MPL Transform has no output streams.')

        graph_time_periods= self.CFG.get_safe('graph_time_periods')

        # If this is meant to be an event driven process, schedule an event to be generated every few minutes/hours
        self.event_timer_interval = self.CFG.get_safe('graph_gen_interval')
        if self.event_timer_interval:
            event_origin = "Interval_Timer_Matplotlib"
            sub = EventSubscriber(event_type="ResourceEvent", callback=self.interval_timer_callback, origin=event_origin)
            sub.start()

            self.interval_timer_id = self.ssclient.create_interval_timer(start_time="now" , interval=self._str_to_secs(self.event_timer_interval),
                event_origin=event_origin, event_subtype="")

        super(VizTransformMatplotlibGraphs,self).on_start()
    def setUp(self):
        # Start container

        logging.disable(logging.ERROR)
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        # simulate preloading
        preload_ion_params(self.container)
        logging.disable(logging.NOTSET)

        #Instantiate a process to represent the test
        process=VisualizationServiceTestProcess()

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceProcessClient(node=self.container.node, process=process)
        self.damsclient = DataAcquisitionManagementServiceProcessClient(node=self.container.node, process=process)
        self.pubsubclient =  PubsubManagementServiceProcessClient(node=self.container.node, process=process)
        self.ingestclient = IngestionManagementServiceProcessClient(node=self.container.node, process=process)
        self.imsclient = InstrumentManagementServiceProcessClient(node=self.container.node, process=process)
        self.dataproductclient = DataProductManagementServiceProcessClient(node=self.container.node, process=process)
        self.dataprocessclient = DataProcessManagementServiceProcessClient(node=self.container.node, process=process)
        self.datasetclient =  DatasetManagementServiceProcessClient(node=self.container.node, process=process)
        self.workflowclient = WorkflowManagementServiceProcessClient(node=self.container.node, process=process)
        self.process_dispatcher = ProcessDispatcherServiceProcessClient(node=self.container.node, process=process)
        self.data_retriever = DataRetrieverServiceProcessClient(node=self.container.node, process=process)
        self.vis_client = VisualizationServiceProcessClient(node=self.container.node, process=process)

        self.ctd_stream_def = SBE37_CDM_stream_definition()
Esempio n. 3
0
    def setUp(self):
        # Start container

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        # simulate preloading
        preload_ion_params(self.container)

        #Instantiate a process to represent the test
        process=WorkflowServiceTestProcess()

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceProcessClient(node=self.container.node, process=process)
        self.damsclient = DataAcquisitionManagementServiceProcessClient(node=self.container.node, process=process)
        self.pubsubclient =  PubsubManagementServiceProcessClient(node=self.container.node, process=process)
        self.ingestclient = IngestionManagementServiceProcessClient(node=self.container.node, process=process)
        self.imsclient = InstrumentManagementServiceProcessClient(node=self.container.node, process=process)
        self.dataproductclient = DataProductManagementServiceProcessClient(node=self.container.node, process=process)
        self.dataprocessclient = DataProcessManagementServiceProcessClient(node=self.container.node, process=process)
        self.datasetclient =  DatasetManagementServiceProcessClient(node=self.container.node, process=process)
        self.workflowclient = WorkflowManagementServiceProcessClient(node=self.container.node, process=process)
        self.process_dispatcher = ProcessDispatcherServiceProcessClient(node=self.container.node, process=process)
        self.data_retriever = DataRetrieverServiceProcessClient(node=self.container.node, process=process)

        self.ctd_stream_def = SBE37_CDM_stream_definition()
Esempio n. 4
0
 def on_start(self):
     SimpleProcess.on_start(self)
     self.data_retriever = DataRetrieverServiceProcessClient(process=self)
     self.interval_key = self.CFG.get_safe('process.interval_key', None)
     self.qc_params = self.CFG.get_safe('process.qc_params', [])
     validate_is_not_none(
         self.interval_key,
         'An interval key is necessary to paunch this process')
     self.event_subscriber = EventSubscriber(event_type=OT.TimerEvent,
                                             origin=self.interval_key,
                                             callback=self._event_callback,
                                             auto_delete=True)
     self.add_endpoint(self.event_subscriber)
     self.resource_registry = self.container.resource_registry
     self.run_interval = self.CFG.get_safe(
         'service.qc_processing.run_interval', 24)
Esempio n. 5
0
 def on_start(self):
     SimpleProcess.on_start(self)
     self.data_retriever = DataRetrieverServiceProcessClient(process=self)
     self.interval_key = self.CFG.get_safe('process.interval_key',None)
     self.qc_params    = self.CFG.get_safe('process.qc_params',[])
     validate_is_not_none(self.interval_key, 'An interval key is necessary to paunch this process')
     self.event_subscriber = EventSubscriber(event_type=OT.TimerEvent, origin=self.interval_key, callback=self._event_callback, auto_delete=True)
     self.add_endpoint(self.event_subscriber)
     self.resource_registry = self.container.resource_registry
Esempio n. 6
0
    def on_start(self):
        #print ">>>>>>>>>>>>>>>>>>>>>> MPL CFG = ", self.CFG

        self.pubsub_management = PubsubManagementServiceProcessClient(
            process=self)
        self.ssclient = SchedulerServiceProcessClient(process=self)
        self.rrclient = ResourceRegistryServiceProcessClient(process=self)
        self.data_retriever_client = DataRetrieverServiceProcessClient(
            process=self)
        self.dsm_client = DatasetManagementServiceProcessClient(process=self)
        self.pubsub_client = PubsubManagementServiceProcessClient(process=self)

        self.stream_info = self.CFG.get_safe('process.publish_streams', {})
        self.stream_names = self.stream_info.keys()
        self.stream_ids = self.stream_info.values()

        if not self.stream_names:
            raise BadRequest('MPL Transform has no output streams.')

        graph_time_periods = self.CFG.get_safe('graph_time_periods')

        # If this is meant to be an event driven process, schedule an event to be generated every few minutes/hours
        self.event_timer_interval = self.CFG.get_safe('graph_gen_interval')
        if self.event_timer_interval:
            event_origin = "Interval_Timer_Matplotlib"
            sub = EventSubscriber(event_type="ResourceEvent",
                                  callback=self.interval_timer_callback,
                                  origin=event_origin)
            sub.start()

            self.interval_timer_id = self.ssclient.create_interval_timer(
                start_time="now",
                interval=self._str_to_secs(self.event_timer_interval),
                event_origin=event_origin,
                event_subtype="")

        super(VizTransformMatplotlibGraphs, self).on_start()
class TestWorkflowManagementIntegration(VisualizationIntegrationTestHelper):

    def setUp(self):
        # Start container

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        #Instantiate a process to represent the test
        process=WorkflowServiceTestProcess()

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceProcessClient(node=self.container.node, process=process)
        self.damsclient = DataAcquisitionManagementServiceProcessClient(node=self.container.node, process=process)
        self.pubsubclient =  PubsubManagementServiceProcessClient(node=self.container.node, process=process)
        self.ingestclient = IngestionManagementServiceProcessClient(node=self.container.node, process=process)
        self.imsclient = InstrumentManagementServiceProcessClient(node=self.container.node, process=process)
        self.dataproductclient = DataProductManagementServiceProcessClient(node=self.container.node, process=process)
        self.dataprocessclient = DataProcessManagementServiceProcessClient(node=self.container.node, process=process)
        self.datasetclient =  DatasetManagementServiceProcessClient(node=self.container.node, process=process)
        self.workflowclient = WorkflowManagementServiceProcessClient(node=self.container.node, process=process)
        self.process_dispatcher = ProcessDispatcherServiceProcessClient(node=self.container.node, process=process)
        self.data_retriever = DataRetrieverServiceProcessClient(node=self.container.node, process=process)

        self.ctd_stream_def = SBE37_CDM_stream_definition()



    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI')
    def test_SA_transform_components(self):

        assertions = self.assertTrue

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the input data product
        ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product()
        data_product_stream_ids.append(ctd_stream_id)


        ###
        ###  Setup the first transformation
        ###

        # Salinity: Data Process Definition
        ctd_L2_salinity_dprocdef_id = self.create_salinity_data_process_definition()

        l2_salinity_all_data_process_id, ctd_l2_salinity_output_dp_id = self.create_transform_process(ctd_L2_salinity_dprocdef_id,ctd_parsed_data_product_id, 'salinity' )

        ## get the stream id for the transform outputs
        stream_ids, _ = self.rrclient.find_objects(ctd_l2_salinity_output_dp_id, PRED.hasStream, None, True)
        assertions(len(stream_ids) > 0 )
        sal_stream_id = stream_ids[0]
        data_product_stream_ids.append(sal_stream_id)


        ###
        ###  Setup the second transformation
        ###

        # Salinity Doubler: Data Process Definition
        salinity_doubler_dprocdef_id = self.create_salinity_doubler_data_process_definition()

        salinity_double_data_process_id, salinity_doubler_output_dp_id = self.create_transform_process(salinity_doubler_dprocdef_id, ctd_l2_salinity_output_dp_id, 'salinity' )

        stream_ids, _ = self.rrclient.find_objects(salinity_doubler_output_dp_id, PRED.hasStream, None, True)
        assertions(len(stream_ids) > 0 )
        sal_dbl_stream_id = stream_ids[0]
        data_product_stream_ids.append(sal_dbl_stream_id)


        #Start the output stream listener to monitor and collect messages
        results = self.start_output_stream_and_listen(ctd_stream_id, data_product_stream_ids)


        #Stop the transform processes
        self.dataprocessclient.deactivate_data_process(salinity_double_data_process_id)
        self.dataprocessclient.deactivate_data_process(l2_salinity_all_data_process_id)

        #Validate the data from each of the messages along the way
        self.validate_messages(results)


    @attr('LOCOINT')
    @attr('SMOKE')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI')
    def test_transform_workflow(self):

        assertions = self.assertTrue

        log.debug("Building the workflow definition")

        workflow_def_obj = IonObject(RT.WorkflowDefinition,
                                     name='Salinity_Test_Workflow',
                                     description='tests a workflow of multiple transform data processes')

        workflow_data_product_name = 'TEST-Workflow_Output_Product' #Set a specific output product name

        #-------------------------------------------------------------------------------------------------------------------------
        log.debug( "Adding a transformation process definition for salinity")
        #-------------------------------------------------------------------------------------------------------------------------

        ctd_L2_salinity_dprocdef_id = self.create_salinity_data_process_definition()
        workflow_step_obj = IonObject('DataProcessWorkflowStep',
                                      data_process_definition_id=ctd_L2_salinity_dprocdef_id,
                                      persist_process_output_data=False)  #Don't persist the intermediate data product
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #-------------------------------------------------------------------------------------------------------------------------
        log.debug( "Adding a transformation process definition for salinity doubler")
        #-------------------------------------------------------------------------------------------------------------------------

        salinity_doubler_dprocdef_id = self.create_salinity_doubler_data_process_definition()
        workflow_step_obj = IonObject('DataProcessWorkflowStep',
                                      data_process_definition_id=salinity_doubler_dprocdef_id, )
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        log.debug( "Creating workflow def in the resource registry")
        workflow_def_id = self.workflowclient.create_workflow_definition(workflow_def_obj)

        aids = self.rrclient.find_associations(workflow_def_id, PRED.hasDataProcessDefinition)
        assertions(len(aids) == 2 )

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        log.debug( "Creating the input data product")
        ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product()
        data_product_stream_ids.append(ctd_stream_id)

        log.debug( "Creating and starting the workflow")
        workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow(workflow_def_id,
                                                                                            ctd_parsed_data_product_id,
                persist_workflow_data_product=True, output_data_product_name=workflow_data_product_name, timeout=300)

        workflow_output_ids,_ = self.rrclient.find_subjects(RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True)
        assertions(len(workflow_output_ids) == 1 )

        log.debug( "persisting the output product")
        #self.dataproductclient.activate_data_product_persistence(workflow_product_id)
        dataset_ids,_ = self.rrclient.find_objects(workflow_product_id, PRED.hasDataset, RT.Dataset, True)
        assertions(len(dataset_ids) == 1 )
        dataset_id = dataset_ids[0]

        log.debug( "Verifying the output data product name matches what was specified in the workflow definition")
        workflow_product = self.rrclient.read(workflow_product_id)
        assertions(workflow_product.name.startswith(workflow_data_product_name), 'Nope: %s != %s' % (workflow_product.name, workflow_data_product_name))

        log.debug( "Walking the associations to find the appropriate output data streams to validate the messages")

        workflow_dp_ids,_ = self.rrclient.find_objects(workflow_id, PRED.hasDataProduct, RT.DataProduct, True)
        assertions(len(workflow_dp_ids) == 2 )

        for dp_id in workflow_dp_ids:
            stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, None, True)
            assertions(len(stream_ids) == 1 )
            data_product_stream_ids.append(stream_ids[0])

        log.debug( "data_product_stream_ids: %s" % data_product_stream_ids)

        log.debug( "Starting the output stream listener to monitor to collect messages")
        results = self.start_output_stream_and_listen(ctd_stream_id, data_product_stream_ids)

        log.debug( "results::: %s" % results)

        log.debug( "Stopping the workflow processes")
        self.workflowclient.terminate_data_process_workflow(workflow_id, False, timeout=250)  # Should test true at some point

        log.debug( "Making sure the Workflow object was removed")
        objs, _ = self.rrclient.find_resources(restype=RT.Workflow)
        assertions(len(objs) == 0)

        log.debug( "Validating the data from each of the messages along the way")
        self.validate_messages(results)

        log.debug( "Checking to see if dataset id = %s, was persisted, and that it can be retrieved...." % dataset_id)
        self.validate_data_ingest_retrieve(dataset_id)

        log.debug( "Cleaning up to make sure delete is correct.")
        self.workflowclient.delete_workflow_definition(workflow_def_id)

        workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition)
        assertions(len(workflow_def_ids) == 0 )



    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI')
    def test_google_dt_transform_workflow(self):

        assertions = self.assertTrue

        # Build the workflow definition
        workflow_def_obj = IonObject(RT.WorkflowDefinition, name='GoogleDT_Test_Workflow',description='Tests the workflow of converting stream data to Google DT')

        #Add a transformation process definition
        google_dt_procdef_id = self.create_google_dt_data_process_definition()
        workflow_step_obj = IonObject('DataProcessWorkflowStep', data_process_definition_id=google_dt_procdef_id, persist_process_output_data=False)
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #Create it in the resource registry
        workflow_def_id = self.workflowclient.create_workflow_definition(workflow_def_obj)

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the input data product
        ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product()
        data_product_stream_ids.append(ctd_stream_id)

        #Create and start the workflow
        workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow(workflow_def_id, ctd_parsed_data_product_id, timeout=60)

        workflow_output_ids,_ = self.rrclient.find_subjects(RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True)
        assertions(len(workflow_output_ids) == 1 )

        #Walk the associations to find the appropriate output data streams to validate the messages
        workflow_dp_ids,_ = self.rrclient.find_objects(workflow_id, PRED.hasDataProduct, RT.DataProduct, True)
        assertions(len(workflow_dp_ids) == 1 )

        for dp_id in workflow_dp_ids:
            stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, None, True)
            assertions(len(stream_ids) == 1 )
            data_product_stream_ids.append(stream_ids[0])

        #Start the output stream listener to monitor and collect messages
        results = self.start_output_stream_and_listen(ctd_stream_id, data_product_stream_ids)

        #Stop the workflow processes
        self.workflowclient.terminate_data_process_workflow(workflow_id=workflow_id,delete_data_products=False, timeout=60)  # Should test true at some point

        #Validate the data from each of the messages along the way
        self.validate_google_dt_transform_results(results)

        """
        # Check to see if ingestion worked. Extract the granules from data_retrieval.
        # First find the dataset associated with the output dp product
        ds_ids,_ = self.rrclient.find_objects(workflow_dp_ids[len(workflow_dp_ids) - 1], PRED.hasDataset, RT.Dataset, True)
        retrieved_granule = self.data_retriever.retrieve(ds_ids[0])

        #Validate the data from each of the messages along the way
        self.validate_google_dt_transform_results(retrieved_granule)
        """

        #Cleanup to make sure delete is correct.
        self.workflowclient.delete_workflow_definition(workflow_def_id)

        workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition)
        assertions(len(workflow_def_ids) == 0 )



    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI')
    def test_mpl_graphs_transform_workflow(self):

        assertions = self.assertTrue

        # Build the workflow definition
        workflow_def_obj = IonObject(RT.WorkflowDefinition, name='Mpl_Graphs_Test_Workflow',description='Tests the workflow of converting stream data to Matplotlib graphs')

        #Add a transformation process definition
        mpl_graphs_procdef_id = self.create_mpl_graphs_data_process_definition()
        workflow_step_obj = IonObject('DataProcessWorkflowStep', data_process_definition_id=mpl_graphs_procdef_id, persist_process_output_data=False)
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #Create it in the resource registry
        workflow_def_id = self.workflowclient.create_workflow_definition(workflow_def_obj)

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the input data product
        ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product()
        data_product_stream_ids.append(ctd_stream_id)

        #Create and start the workflow
        workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow(workflow_def_id, ctd_parsed_data_product_id,
            persist_workflow_data_product=True, timeout=60)

        workflow_output_ids,_ = self.rrclient.find_subjects(RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True)
        assertions(len(workflow_output_ids) == 1 )

        #Walk the associations to find the appropriate output data streams to validate the messages
        workflow_dp_ids,_ = self.rrclient.find_objects(workflow_id, PRED.hasDataProduct, RT.DataProduct, True)
        assertions(len(workflow_dp_ids) == 1 )

        for dp_id in workflow_dp_ids:
            stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, None, True)
            assertions(len(stream_ids) == 1 )
            data_product_stream_ids.append(stream_ids[0])

        #Start the output stream listener to monitor and collect messages
        results = self.start_output_stream_and_listen(ctd_stream_id, data_product_stream_ids)

        #Stop the workflow processes
        self.workflowclient.terminate_data_process_workflow(workflow_id=workflow_id,delete_data_products=False, timeout=60)  # Should test true at some point

        #Validate the data from each of the messages along the way
        self.validate_mpl_graphs_transform_results(results)

        # Check to see if ingestion worked. Extract the granules from data_retrieval.
        # First find the dataset associated with the output dp product
        ds_ids,_ = self.rrclient.find_objects(workflow_dp_ids[len(workflow_dp_ids) - 1], PRED.hasDataset, RT.Dataset, True)
        retrieved_granule = self.data_retriever.retrieve_last_data_points(ds_ids[0], 10)

        #Validate the data from each of the messages along the way
        self.validate_mpl_graphs_transform_results(retrieved_granule)

        #Cleanup to make sure delete is correct.
        self.workflowclient.delete_workflow_definition(workflow_def_id)

        workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition)
        assertions(len(workflow_def_ids) == 0 )


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI')
    def test_multiple_workflow_instances(self):

        assertions = self.assertTrue

        # Build the workflow definition
        workflow_def_obj = IonObject(RT.WorkflowDefinition, name='Multiple_Test_Workflow',description='Tests the workflow of converting stream data')

        #Add a transformation process definition
        google_dt_procdef_id = self.create_google_dt_data_process_definition()
        workflow_step_obj = IonObject('DataProcessWorkflowStep', data_process_definition_id=google_dt_procdef_id, persist_process_output_data=False)
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #Create it in the resource registry
        workflow_def_id = self.workflowclient.create_workflow_definition(workflow_def_obj)

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the first input data product
        ctd_stream_id1, ctd_parsed_data_product_id1 = self.create_ctd_input_stream_and_data_product('ctd_parsed1')
        data_product_stream_ids.append(ctd_stream_id1)

        #Create and start the first workflow
        workflow_id1, workflow_product_id1 = self.workflowclient.create_data_process_workflow(workflow_def_id, ctd_parsed_data_product_id1, timeout=60)

        #Create the second input data product
        ctd_stream_id2, ctd_parsed_data_product_id2 = self.create_ctd_input_stream_and_data_product('ctd_parsed2')
        data_product_stream_ids.append(ctd_stream_id2)

        #Create and start the second workflow
        workflow_id2, workflow_product_id2 = self.workflowclient.create_data_process_workflow(workflow_def_id, ctd_parsed_data_product_id2, timeout=60)

        #Walk the associations to find the appropriate output data streams to validate the messages
        workflow_ids,_ = self.rrclient.find_resources(restype=RT.Workflow)
        assertions(len(workflow_ids) == 2 )


        #Start the first input stream process
        ctd_sim_pid1 = self.start_sinusoidal_input_stream_process(ctd_stream_id1)

        #Start the second input stream process
        ctd_sim_pid2 = self.start_simple_input_stream_process(ctd_stream_id2)

        #Start the output stream listener to monitor a set number of messages being sent through the workflows
        results = self.start_output_stream_and_listen(None, data_product_stream_ids, message_count_per_stream=5)

        # stop the flow of messages...
        self.process_dispatcher.cancel_process(ctd_sim_pid1) # kill the ctd simulator process - that is enough data
        self.process_dispatcher.cancel_process(ctd_sim_pid2)

        #Stop the first workflow processes
        self.workflowclient.terminate_data_process_workflow(workflow_id=workflow_id1,delete_data_products=False, timeout=60)  # Should test true at some point

        #Stop the second workflow processes
        self.workflowclient.terminate_data_process_workflow(workflow_id=workflow_id2,delete_data_products=False, timeout=60)  # Should test true at some point

        workflow_ids,_ = self.rrclient.find_resources(restype=RT.Workflow)
        assertions(len(workflow_ids) == 0 )

        #Cleanup to make sure delete is correct.
        self.workflowclient.delete_workflow_definition(workflow_def_id)

        workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition)
        assertions(len(workflow_def_ids) == 0 )

        aid_list = self.rrclient.find_associations(workflow_def_id, PRED.hasDataProcessDefinition)
        assertions(len(aid_list) == 0 )
Esempio n. 8
0
class QCPostProcessing(SimpleProcess):
    '''
    QC Post Processing Process

    This process provides the capability to ION clients and operators to evaluate the automated quality control flags on
    various data products. This process should be run periodically with overlapping spans of data to ensure complete
    dataset QC verification.

    This parameters that this process accepts as configurations are:
        - dataset_id: The dataset identifier, required.
        - start_time: Unix timestamp, defaults to 24 hours in the past
        - end_time: Unix timestamp, defaults to current time
        - qc_params: a list of qc functions to evaluate, currently supported functions are: ['glblrng_qc',
          'spketst_qc', 'stuckvl_qc'], defaults to all

    '''

    qc_suffixes = ['glblrng_qc', 'spketst_qc', 'stuckvl_qc']
    def on_start(self):
        SimpleProcess.on_start(self)
        self.data_retriever = DataRetrieverServiceProcessClient(process=self)
        self.interval_key = self.CFG.get_safe('process.interval_key',None)
        self.qc_params    = self.CFG.get_safe('process.qc_params',[])
        validate_is_not_none(self.interval_key, 'An interval key is necessary to paunch this process')
        self.event_subscriber = EventSubscriber(event_type=OT.TimerEvent, origin=self.interval_key, callback=self._event_callback, auto_delete=True)
        self.add_endpoint(self.event_subscriber)
        self.resource_registry = self.container.resource_registry
        self.run_interval = self.CFG.get_safe('service.qc_processing.run_interval', 24)
    
    def _event_callback(self, *args, **kwargs):
        log.info('QC Post Processing Triggered')
        dataset_ids, _ = self.resource_registry.find_resources(restype=RT.Dataset, id_only=True)
        for dataset_id in dataset_ids:
            log.info('QC Post Processing for dataset %s', dataset_id)
            try:
                self.process(dataset_id)
            except BadRequest as e:
                if 'Problems reading from the coverage' in e.message:
                    log.error('Failed to read from dataset %s', dataset_id, exc_info=True)

    def process(self, dataset_id, start_time=0, end_time=0):
        if not dataset_id:
            raise BadRequest('No dataset id specified.')
        now = time.time()
        start_time = start_time or (now - (3600*(self.run_interval+1))) # Every N hours with 1 of overlap
        end_time   = end_time or now
        
        qc_params  = [i for i in self.qc_params if i in self.qc_suffixes] or self.qc_suffixes
        
        self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent)
        log.debug('Iterating over the data blocks')

        for st,et in self.chop(int(start_time),int(end_time)):
            log.debug('Chopping %s:%s', st, et)
            log.debug("Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')", dataset_id, st, et)
            try:
                granule = self.data_retriever.retrieve(dataset_id, query={'start_time':st, 'end_time':et})
            except BadRequest:
                data_products, _ = self.container.resource_registry.find_subjects(object=dataset_id, predicate=PRED.hasDataset, subject_type=RT.DataProduct)
                for data_product in data_products:
                    log.exception('Failed to perform QC Post Processing on %s', data_product.name)
                    log.error('Calculated Start Time: %s', st)
                    log.error('Calculated End Time:   %s', et)
                raise
            log.debug('Retrieved Data')
            rdt = RecordDictionaryTool.load_from_granule(granule)
            qc_fields = [i for i in rdt.fields if any([i.endswith(j) for j in qc_params])]
            log.debug('QC Fields: %s', qc_fields)
            for field in qc_fields:
                val = rdt[field]
                if val is None:
                    continue
                if not np.all(val):
                    log.debug('Found QC Alerts')
                    indexes = np.where(val==0)
                    timestamps = rdt[rdt.temporal_parameter][indexes[0]]
                    self.flag_qc_parameter(dataset_id, field, timestamps.tolist(),{})



    def flag_qc_parameter(self, dataset_id, parameter, temporal_values, configuration):
        log.info('Flagging QC for %s', parameter)
        data_product_ids, _ = self.resource_registry.find_subjects(object=dataset_id, subject_type=RT.DataProduct, predicate=PRED.hasDataset, id_only=True)
        for data_product_id in data_product_ids:
            self.qc_publisher.publish_event(origin=data_product_id, qc_parameter=parameter, temporal_values=temporal_values, configuration=configuration)

    @classmethod
    def chop(cls, start_time, end_time):
        while start_time < end_time:
            yield (start_time, min(start_time+3600, end_time))
            start_time = min(start_time+3600, end_time)
        return
Esempio n. 9
0
class VizTransformMatplotlibGraphs(TransformStreamPublisher, TransformEventListener, TransformStreamListener):

    """
    This class is used for instantiating worker processes that have subscriptions to data streams and convert
    incoming data from CDM format to Matplotlib graphs

    """
    output_bindings = ['graph_image_param_dict']
    event_timer_interval = None


    def on_start(self):
        #print ">>>>>>>>>>>>>>>>>>>>>> MPL CFG = ", self.CFG

        self.pubsub_management = PubsubManagementServiceProcessClient(process=self)
        self.ssclient = SchedulerServiceProcessClient(process=self)
        self.rrclient = ResourceRegistryServiceProcessClient(process=self)
        self.data_retriever_client = DataRetrieverServiceProcessClient(process=self)
        self.dsm_client = DatasetManagementServiceProcessClient(process=self)
        self.pubsub_client = PubsubManagementServiceProcessClient(process = self)

        self.stream_info  = self.CFG.get_safe('process.publish_streams',{})
        self.stream_names = self.stream_info.keys()
        self.stream_ids   = self.stream_info.values()

        if not self.stream_names:
            raise BadRequest('MPL Transform has no output streams.')

        graph_time_periods= self.CFG.get_safe('graph_time_periods')

        # If this is meant to be an event driven process, schedule an event to be generated every few minutes/hours
        self.event_timer_interval = self.CFG.get_safe('graph_gen_interval')
        if self.event_timer_interval:
            event_origin = "Interval_Timer_Matplotlib"
            sub = EventSubscriber(event_type="ResourceEvent", callback=self.interval_timer_callback, origin=event_origin)
            sub.start()

            self.interval_timer_id = self.ssclient.create_interval_timer(start_time="now" , interval=self._str_to_secs(self.event_timer_interval),
                event_origin=event_origin, event_subtype="")

        super(VizTransformMatplotlibGraphs,self).on_start()

    # when tranform is used as a data process
    def recv_packet(self, packet, in_stream_route, in_stream_id):
        #Check to see if the class instance was set up as a event triggered transform. If yes, skip the packet
        if self.event_timer_interval:
            return

        log.info('Received packet')
        mpl_data_granule = VizTransformMatplotlibGraphsAlgorithm.execute(packet, params=self.get_stream_definition())
        for stream_name in self.stream_names:
            publisher = getattr(self, stream_name)
            publisher.publish(mpl_data_granule)

    def get_stream_definition(self):
        stream_id = self.stream_ids[0]
        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        return stream_def._id

    def process_event(self, msg, headers):

        return

    def interval_timer_callback(self, *args, **kwargs):
        #Find out the input data product to this process
        in_dp_id = self.CFG.get_safe('in_dp_id')

        print " >>>>>>>>>>>>>> IN DP ID from cfg : ", in_dp_id

        # get the dataset_id associated with the data_product. Need it to do the data retrieval
        ds_ids,_ = self.rrclient.find_objects(in_dp_id, PRED.hasDataset, RT.Dataset, True)
        if ds_ids is None or not ds_ids:
            return None

        # retrieve data for the specified time interval. Setup up query from pass config first
        query = {}

        param_list_str = self.CFG.get_safe('parameters')
        if param_list_str:
            query['parameters'] = param_list_str.split(', ')
            # append time if not present in list of parameters
            if not 'time' in query['parameters']:
                query['parameters'].append('time')


        query['start_time'] = query['end_time'] = ntplib.system_to_ntp_time(time.time()) # Now
        query['stride_time'] = 1
        if self.CFG.get_safe('graph_time_period'):
            query['start_time'] = query['end_time'] - self._str_to_secs(self.CFG.get_safe('graph_time_period'))

        #print " >>>>>>>>>>>>>> QUERY = ", query

        #retrieved_granule = self.data_retriever_client.retrieve(ds_ids[0],{'start_time':start_time,'end_time':end_time})
        retrieved_granule = self.data_retriever_client.retrieve(ds_ids[0], query=query)

        # add extra parameters to query passed in config that are not needed by data retrieval
        if self.CFG.get_safe('resolution'):
            query['resolution'] = self.CFG.get_safe('resolution')

        # send the granule through the Algorithm code to get the matplotlib graphs
        mpl_pdict_id = self.dsm_client.read_parameter_dictionary_by_name('graph_image_param_dict',id_only=True)

        mpl_stream_def = self.pubsub_client.create_stream_definition('mpl', parameter_dictionary_id=mpl_pdict_id)
        fileName = self.CFG.get_safe('graph_time_period')
        mpl_data_granule = VizTransformMatplotlibGraphsAlgorithm.execute(retrieved_granule, config=query, params=mpl_stream_def, fileName=fileName)

        if mpl_data_granule == None:
            return None

        # publish on all specified output streams
        for stream_name in self.stream_names:
            publisher = getattr(self, stream_name)
            publisher.publish(mpl_data_granule)

        return

    def _str_to_secs(self, time_period):
        # this method converts commonly used time periods to its actual seconds counterpart
        #separate alpha and numeric parts of the time period
        time_n = time_period.lower().rstrip('abcdefghijklmnopqrstuvwxyz ')
        time_a = time_period.lower().lstrip('0123456789. ')

        # determine if user specified, secs, mins, hours, days, weeks, months, years
        factor = None
        if time_a == 'sec' or time_a == "secs" or time_a == 'second' or time_a == "seconds":
            factor = 1
        if time_a == "min" or time_a == "mins" or time_a == "minute" or time_a == "minutes":
            factor = 60
        if time_a == "hr" or time_a == "hrs" or time_a == "hour" or time_a == "hours":
            factor = 60 * 60
        if time_a == "day" or time_a == "days":
            factor = 60 * 60 * 24
        if time_a == "wk" or time_a == "wks" or time_a == "week" or time_a == "weeks":
            factor = 60 * 60 * 24 * 7
        if time_a == "mon" or time_a == "mons" or time_a == "month" or time_a == "months":
            factor = 60 * 60 * 24 * 30
        if time_a == "yr" or time_a == "yrs" or time_a == "year" or time_a == "years":
            factor = 60 * 60 * 24 * 365

        time_period_secs = float(time_n) * factor
        return time_period_secs


    def on_quit(self):

        #Cancel the timer
        if hasattr(self, 'interval_timer_id'):
            self.ssclient.cancel_timer(self.interval_timer_id)

        super(VizTransformMatplotlibGraphs,self).on_quit()
Esempio n. 10
0
class QCPostProcessing(SimpleProcess):
    '''
    QC Post Processing Process

    This process provides the capability to ION clients and operators to evaluate the automated quality control flags on
    various data products. This process should be run periodically with overlapping spans of data to ensure complete
    dataset QC verification.

    This parameters that this process accepts as configurations are:
        - dataset_id: The dataset identifier, required.
        - start_time: Unix timestamp, defaults to 24 hours in the past
        - end_time: Unix timestamp, defaults to current time
        - qc_params: a list of qc functions to evaluate, currently supported functions are: ['glblrng_qc',
          'spketst_qc', 'stuckvl_qc'], defaults to all

    '''

    qc_suffixes = ['glblrng_qc', 'spketst_qc', 'stuckvl_qc']

    def on_start(self):
        SimpleProcess.on_start(self)
        self.data_retriever = DataRetrieverServiceProcessClient(process=self)
        self.interval_key = self.CFG.get_safe('process.interval_key', None)
        self.qc_params = self.CFG.get_safe('process.qc_params', [])
        validate_is_not_none(
            self.interval_key,
            'An interval key is necessary to paunch this process')
        self.event_subscriber = EventSubscriber(event_type=OT.TimerEvent,
                                                origin=self.interval_key,
                                                callback=self._event_callback,
                                                auto_delete=True)
        self.add_endpoint(self.event_subscriber)
        self.resource_registry = self.container.resource_registry
        self.run_interval = self.CFG.get_safe(
            'service.qc_processing.run_interval', 24)

    def _event_callback(self, *args, **kwargs):
        log.info('QC Post Processing Triggered')
        dataset_ids, _ = self.resource_registry.find_resources(
            restype=RT.Dataset, id_only=True)
        for dataset_id in dataset_ids:
            log.info('QC Post Processing for dataset %s', dataset_id)
            try:
                self.process(dataset_id)
            except BadRequest as e:
                if 'Problems reading from the coverage' in e.message:
                    log.error('Failed to read from dataset')

    def process(self, dataset_id, start_time=0, end_time=0):
        if not dataset_id:
            raise BadRequest('No dataset id specified.')
        now = time.time()
        start_time = start_time or (now - (3600 * (self.run_interval + 1))
                                    )  # Every N hours with 1 of overlap
        end_time = end_time or now

        qc_params = [i for i in self.qc_params if i in self.qc_suffixes
                     ] or self.qc_suffixes

        self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent)
        log.debug('Iterating over the data blocks')

        for st, et in self.chop(int(start_time), int(end_time)):
            log.debug('Chopping %s:%s', st, et)
            log.debug(
                "Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')",
                dataset_id, st, et)
            granule = self.data_retriever.retrieve(dataset_id,
                                                   query={
                                                       'start_time': st,
                                                       'end_time': et
                                                   })
            log.debug('Retrieved Data')
            rdt = RecordDictionaryTool.load_from_granule(granule)
            qc_fields = [
                i for i in rdt.fields
                if any([i.endswith(j) for j in qc_params])
            ]
            log.debug('QC Fields: %s', qc_fields)
            for field in qc_fields:
                val = rdt[field]
                if val is None:
                    continue
                if not np.all(val):
                    log.debug('Found QC Alerts')
                    indexes = np.where(val == 0)
                    timestamps = rdt[rdt.temporal_parameter][indexes[0]]
                    self.flag_qc_parameter(dataset_id, field,
                                           timestamps.tolist(), {})

    def flag_qc_parameter(self, dataset_id, parameter, temporal_values,
                          configuration):
        log.info('Flagging QC for %s', parameter)
        data_product_ids, _ = self.resource_registry.find_subjects(
            object=dataset_id,
            subject_type=RT.DataProduct,
            predicate=PRED.hasDataset,
            id_only=True)
        for data_product_id in data_product_ids:
            self.qc_publisher.publish_event(origin=data_product_id,
                                            qc_parameter=parameter,
                                            temporal_values=temporal_values,
                                            configuration=configuration)

    @classmethod
    def chop(cls, start_time, end_time):
        while start_time < end_time:
            yield (start_time, min(start_time + 3600, end_time))
            start_time = min(start_time + 3600, end_time)
        return
Esempio n. 11
0
class VizTransformMatplotlibGraphs(TransformStreamPublisher,
                                   TransformEventListener,
                                   TransformStreamListener):
    """
    This class is used for instantiating worker processes that have subscriptions to data streams and convert
    incoming data from CDM format to Matplotlib graphs

    """
    output_bindings = ['graph_image_param_dict']
    event_timer_interval = None

    def on_start(self):
        #print ">>>>>>>>>>>>>>>>>>>>>> MPL CFG = ", self.CFG

        self.pubsub_management = PubsubManagementServiceProcessClient(
            process=self)
        self.ssclient = SchedulerServiceProcessClient(process=self)
        self.rrclient = ResourceRegistryServiceProcessClient(process=self)
        self.data_retriever_client = DataRetrieverServiceProcessClient(
            process=self)
        self.dsm_client = DatasetManagementServiceProcessClient(process=self)
        self.pubsub_client = PubsubManagementServiceProcessClient(process=self)

        self.stream_info = self.CFG.get_safe('process.publish_streams', {})
        self.stream_names = self.stream_info.keys()
        self.stream_ids = self.stream_info.values()

        if not self.stream_names:
            raise BadRequest('MPL Transform has no output streams.')

        graph_time_periods = self.CFG.get_safe('graph_time_periods')

        # If this is meant to be an event driven process, schedule an event to be generated every few minutes/hours
        self.event_timer_interval = self.CFG.get_safe('graph_gen_interval')
        if self.event_timer_interval:
            event_origin = "Interval_Timer_Matplotlib"
            sub = EventSubscriber(event_type="ResourceEvent",
                                  callback=self.interval_timer_callback,
                                  origin=event_origin)
            sub.start()

            self.interval_timer_id = self.ssclient.create_interval_timer(
                start_time="now",
                interval=self._str_to_secs(self.event_timer_interval),
                event_origin=event_origin,
                event_subtype="")

        super(VizTransformMatplotlibGraphs, self).on_start()

    # when tranform is used as a data process
    def recv_packet(self, packet, in_stream_route, in_stream_id):
        #Check to see if the class instance was set up as a event triggered transform. If yes, skip the packet
        if self.event_timer_interval:
            return

        log.info('Received packet')
        mpl_data_granule = VizTransformMatplotlibGraphsAlgorithm.execute(
            packet, params=self.get_stream_definition())
        for stream_name in self.stream_names:
            publisher = getattr(self, stream_name)
            publisher.publish(mpl_data_granule)

    def get_stream_definition(self):
        stream_id = self.stream_ids[0]
        stream_def = self.pubsub_management.read_stream_definition(
            stream_id=stream_id)
        return stream_def._id

    def process_event(self, msg, headers):

        return

    def interval_timer_callback(self, *args, **kwargs):
        #Find out the input data product to this process
        in_dp_id = self.CFG.get_safe('in_dp_id')

        print " >>>>>>>>>>>>>> IN DP ID from cfg : ", in_dp_id

        # get the dataset_id associated with the data_product. Need it to do the data retrieval
        ds_ids, _ = self.rrclient.find_objects(in_dp_id, PRED.hasDataset,
                                               RT.Dataset, True)
        if ds_ids is None or not ds_ids:
            return None

        # retrieve data for the specified time interval. Setup up query from pass config first
        query = {}

        param_list_str = self.CFG.get_safe('parameters')
        if param_list_str:
            query['parameters'] = param_list_str.split(', ')
            # append time if not present in list of parameters
            if not 'time' in query['parameters']:
                query['parameters'].append('time')

        query['start_time'] = query['end_time'] = ntplib.system_to_ntp_time(
            time.time())  # Now
        query['stride_time'] = 1
        if self.CFG.get_safe('graph_time_period'):
            query['start_time'] = query['end_time'] - self._str_to_secs(
                self.CFG.get_safe('graph_time_period'))

        #print " >>>>>>>>>>>>>> QUERY = ", query

        #retrieved_granule = self.data_retriever_client.retrieve(ds_ids[0],{'start_time':start_time,'end_time':end_time})
        retrieved_granule = self.data_retriever_client.retrieve(ds_ids[0],
                                                                query=query)

        # add extra parameters to query passed in config that are not needed by data retrieval
        if self.CFG.get_safe('resolution'):
            query['resolution'] = self.CFG.get_safe('resolution')

        # send the granule through the Algorithm code to get the matplotlib graphs
        mpl_pdict_id = self.dsm_client.read_parameter_dictionary_by_name(
            'graph_image_param_dict', id_only=True)

        mpl_stream_def = self.pubsub_client.create_stream_definition(
            'mpl', parameter_dictionary_id=mpl_pdict_id)
        fileName = self.CFG.get_safe('graph_time_period')
        mpl_data_granule = VizTransformMatplotlibGraphsAlgorithm.execute(
            retrieved_granule,
            config=query,
            params=mpl_stream_def,
            fileName=fileName)

        if mpl_data_granule == None:
            return None

        # publish on all specified output streams
        for stream_name in self.stream_names:
            publisher = getattr(self, stream_name)
            publisher.publish(mpl_data_granule)

        return

    def _str_to_secs(self, time_period):
        # this method converts commonly used time periods to its actual seconds counterpart
        #separate alpha and numeric parts of the time period
        time_n = time_period.lower().rstrip('abcdefghijklmnopqrstuvwxyz ')
        time_a = time_period.lower().lstrip('0123456789. ')

        # determine if user specified, secs, mins, hours, days, weeks, months, years
        factor = None
        if time_a == 'sec' or time_a == "secs" or time_a == 'second' or time_a == "seconds":
            factor = 1
        if time_a == "min" or time_a == "mins" or time_a == "minute" or time_a == "minutes":
            factor = 60
        if time_a == "hr" or time_a == "hrs" or time_a == "hour" or time_a == "hours":
            factor = 60 * 60
        if time_a == "day" or time_a == "days":
            factor = 60 * 60 * 24
        if time_a == "wk" or time_a == "wks" or time_a == "week" or time_a == "weeks":
            factor = 60 * 60 * 24 * 7
        if time_a == "mon" or time_a == "mons" or time_a == "month" or time_a == "months":
            factor = 60 * 60 * 24 * 30
        if time_a == "yr" or time_a == "yrs" or time_a == "year" or time_a == "years":
            factor = 60 * 60 * 24 * 365

        time_period_secs = float(time_n) * factor
        return time_period_secs

    def on_quit(self):

        #Cancel the timer
        if hasattr(self, 'interval_timer_id'):
            self.ssclient.cancel_timer(self.interval_timer_id)

        super(VizTransformMatplotlibGraphs, self).on_quit()
Esempio n. 12
0
class TestWorkflowManagementIntegration(VisualizationIntegrationTestHelper):
    def setUp(self):
        # Start container

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        # simulate preloading
        preload_ion_params(self.container)

        #Instantiate a process to represent the test
        process = WorkflowServiceTestProcess()

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceProcessClient(
            node=self.container.node, process=process)
        self.damsclient = DataAcquisitionManagementServiceProcessClient(
            node=self.container.node, process=process)
        self.pubsubclient = PubsubManagementServiceProcessClient(
            node=self.container.node, process=process)
        self.ingestclient = IngestionManagementServiceProcessClient(
            node=self.container.node, process=process)
        self.imsclient = InstrumentManagementServiceProcessClient(
            node=self.container.node, process=process)
        self.dataproductclient = DataProductManagementServiceProcessClient(
            node=self.container.node, process=process)
        self.dataprocessclient = DataProcessManagementServiceProcessClient(
            node=self.container.node, process=process)
        self.datasetclient = DatasetManagementServiceProcessClient(
            node=self.container.node, process=process)
        self.workflowclient = WorkflowManagementServiceProcessClient(
            node=self.container.node, process=process)
        self.process_dispatcher = ProcessDispatcherServiceProcessClient(
            node=self.container.node, process=process)
        self.data_retriever = DataRetrieverServiceProcessClient(
            node=self.container.node, process=process)

        self.ctd_stream_def = SBE37_CDM_stream_definition()

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Not integrated for CEI')
    def test_SA_transform_components(self):

        assertions = self.assertTrue

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the input data product
        ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product(
        )
        data_product_stream_ids.append(ctd_stream_id)

        ###
        ###  Setup the first transformation
        ###

        # Salinity: Data Process Definition
        ctd_L2_salinity_dprocdef_id = self.create_salinity_data_process_definition(
        )

        l2_salinity_all_data_process_id, ctd_l2_salinity_output_dp_id = self.create_transform_process(
            ctd_L2_salinity_dprocdef_id, ctd_parsed_data_product_id,
            'salinity')

        ## get the stream id for the transform outputs
        stream_ids, _ = self.rrclient.find_objects(
            ctd_l2_salinity_output_dp_id, PRED.hasStream, None, True)
        assertions(len(stream_ids) > 0)
        sal_stream_id = stream_ids[0]
        data_product_stream_ids.append(sal_stream_id)

        ###
        ###  Setup the second transformation
        ###

        # Salinity Doubler: Data Process Definition
        salinity_doubler_dprocdef_id = self.create_salinity_doubler_data_process_definition(
        )

        salinity_double_data_process_id, salinity_doubler_output_dp_id = self.create_transform_process(
            salinity_doubler_dprocdef_id, ctd_l2_salinity_output_dp_id,
            'salinity')

        stream_ids, _ = self.rrclient.find_objects(
            salinity_doubler_output_dp_id, PRED.hasStream, None, True)
        assertions(len(stream_ids) > 0)
        sal_dbl_stream_id = stream_ids[0]
        data_product_stream_ids.append(sal_dbl_stream_id)

        #Start the output stream listener to monitor and collect messages
        results = self.start_output_stream_and_listen(ctd_stream_id,
                                                      data_product_stream_ids)

        #Stop the transform processes
        self.dataprocessclient.deactivate_data_process(
            salinity_double_data_process_id)
        self.dataprocessclient.deactivate_data_process(
            l2_salinity_all_data_process_id)

        #Validate the data from each of the messages along the way
        self.validate_messages(results)

    @attr('LOCOINT')
    @attr('SMOKE')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Not integrated for CEI')
    def test_transform_workflow(self):

        assertions = self.assertTrue

        log.debug("Building the workflow definition")

        workflow_def_obj = IonObject(
            RT.WorkflowDefinition,
            name='Salinity_Test_Workflow',
            description='tests a workflow of multiple transform data processes'
        )

        workflow_data_product_name = 'TEST-Workflow_Output_Product'  #Set a specific output product name

        #-------------------------------------------------------------------------------------------------------------------------
        log.debug("Adding a transformation process definition for salinity")
        #-------------------------------------------------------------------------------------------------------------------------

        ctd_L2_salinity_dprocdef_id = self.create_salinity_data_process_definition(
        )
        workflow_step_obj = IonObject(
            'DataProcessWorkflowStep',
            data_process_definition_id=ctd_L2_salinity_dprocdef_id,
            persist_process_output_data=False
        )  #Don't persist the intermediate data product
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #-------------------------------------------------------------------------------------------------------------------------
        log.debug(
            "Adding a transformation process definition for salinity doubler")
        #-------------------------------------------------------------------------------------------------------------------------

        salinity_doubler_dprocdef_id = self.create_salinity_doubler_data_process_definition(
        )
        workflow_step_obj = IonObject(
            'DataProcessWorkflowStep',
            data_process_definition_id=salinity_doubler_dprocdef_id,
        )
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        log.debug("Creating workflow def in the resource registry")
        workflow_def_id = self.workflowclient.create_workflow_definition(
            workflow_def_obj)

        aids = self.rrclient.find_associations(workflow_def_id,
                                               PRED.hasDataProcessDefinition)
        assertions(len(aids) == 2)

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        log.debug("Creating the input data product")
        ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product(
        )
        data_product_stream_ids.append(ctd_stream_id)

        log.debug("Creating and starting the workflow")
        workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow(
            workflow_def_id,
            ctd_parsed_data_product_id,
            persist_workflow_data_product=True,
            output_data_product_name=workflow_data_product_name,
            timeout=300)

        workflow_output_ids, _ = self.rrclient.find_subjects(
            RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True)
        assertions(len(workflow_output_ids) == 1)

        log.debug("persisting the output product")
        #self.dataproductclient.activate_data_product_persistence(workflow_product_id)
        dataset_ids, _ = self.rrclient.find_objects(workflow_product_id,
                                                    PRED.hasDataset,
                                                    RT.Dataset, True)
        assertions(len(dataset_ids) == 1)
        dataset_id = dataset_ids[0]

        log.debug(
            "Verifying the output data product name matches what was specified in the workflow definition"
        )
        workflow_product = self.rrclient.read(workflow_product_id)
        assertions(
            workflow_product.name.startswith(workflow_data_product_name),
            'Nope: %s != %s' %
            (workflow_product.name, workflow_data_product_name))

        log.debug(
            "Walking the associations to find the appropriate output data streams to validate the messages"
        )

        workflow_dp_ids, _ = self.rrclient.find_objects(
            workflow_id, PRED.hasDataProduct, RT.DataProduct, True)
        assertions(len(workflow_dp_ids) == 2)

        for dp_id in workflow_dp_ids:
            stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                       None, True)
            assertions(len(stream_ids) == 1)
            data_product_stream_ids.append(stream_ids[0])

        log.debug("data_product_stream_ids: %s" % data_product_stream_ids)

        log.debug(
            "Starting the output stream listener to monitor to collect messages"
        )
        results = self.start_output_stream_and_listen(ctd_stream_id,
                                                      data_product_stream_ids)

        log.debug("results::: %s" % results)

        log.debug("Stopping the workflow processes")
        self.workflowclient.terminate_data_process_workflow(
            workflow_id, False, timeout=250)  # Should test true at some point

        log.debug("Making sure the Workflow object was removed")
        objs, _ = self.rrclient.find_resources(restype=RT.Workflow)
        assertions(len(objs) == 0)

        log.debug(
            "Validating the data from each of the messages along the way")
        self.validate_messages(results)

        log.debug(
            "Checking to see if dataset id = %s, was persisted, and that it can be retrieved...."
            % dataset_id)
        self.validate_data_ingest_retrieve(dataset_id)

        log.debug("Cleaning up to make sure delete is correct.")
        self.workflowclient.delete_workflow_definition(workflow_def_id)
        """
        workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition)
        assertions(len(workflow_def_ids) == 0 )
        """

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Not integrated for CEI')
    def test_highcharts_transform_workflow(self):

        assertions = self.assertTrue

        # Build the workflow definition
        workflow_def_obj = IonObject(
            RT.WorkflowDefinition,
            name='HighCharts_Test_Workflow',
            description=
            'Tests the workflow of converting stream data to HighCharts')

        #Add a transformation process definition
        highcharts_procdef_id = self.create_highcharts_data_process_definition(
        )
        workflow_step_obj = IonObject(
            'DataProcessWorkflowStep',
            data_process_definition_id=highcharts_procdef_id,
            persist_process_output_data=False)
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #Create it in the resource registry
        workflow_def_id = self.workflowclient.create_workflow_definition(
            workflow_def_obj)

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the input data product
        ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product(
        )
        data_product_stream_ids.append(ctd_stream_id)

        #Create and start the workflow
        workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow(
            workflow_def_id, ctd_parsed_data_product_id, timeout=60)

        workflow_output_ids, _ = self.rrclient.find_subjects(
            RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True)
        assertions(len(workflow_output_ids) == 1)

        #Walk the associations to find the appropriate output data streams to validate the messages
        workflow_dp_ids, _ = self.rrclient.find_objects(
            workflow_id, PRED.hasDataProduct, RT.DataProduct, True)
        assertions(len(workflow_dp_ids) == 1)

        for dp_id in workflow_dp_ids:
            stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                       None, True)
            assertions(len(stream_ids) == 1)
            data_product_stream_ids.append(stream_ids[0])

        #Start the output stream listener to monitor and collect messages
        results = self.start_output_stream_and_listen(ctd_stream_id,
                                                      data_product_stream_ids)

        #Stop the workflow processes
        self.workflowclient.terminate_data_process_workflow(
            workflow_id=workflow_id, delete_data_products=False,
            timeout=60)  # Should test true at some point

        #Validate the data from each of the messages along the way
        self.validate_highcharts_transform_results(results)

        #Cleanup to make sure delete is correct.
        self.workflowclient.delete_workflow_definition(workflow_def_id)
        """
        workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition)

        assertions(len(workflow_def_ids) == 0 )
        """

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Not integrated for CEI')
    def test_mpl_graphs_transform_workflow(self):

        assertions = self.assertTrue

        # Build the workflow definition
        workflow_def_obj = IonObject(
            RT.WorkflowDefinition,
            name='Mpl_Graphs_Test_Workflow',
            description=
            'Tests the workflow of converting stream data to Matplotlib graphs'
        )

        #Add a transformation process definition
        mpl_graphs_procdef_id = self.create_mpl_graphs_data_process_definition(
        )
        workflow_step_obj = IonObject(
            'DataProcessWorkflowStep',
            data_process_definition_id=mpl_graphs_procdef_id,
            persist_process_output_data=False)
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #Create it in the resource registry
        workflow_def_id = self.workflowclient.create_workflow_definition(
            workflow_def_obj)

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the input data product
        ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product(
        )
        data_product_stream_ids.append(ctd_stream_id)

        #Create and start the workflow
        workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow(
            workflow_def_id,
            ctd_parsed_data_product_id,
            persist_workflow_data_product=True,
            timeout=60)

        workflow_output_ids, _ = self.rrclient.find_subjects(
            RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True)
        assertions(len(workflow_output_ids) == 1)

        #Walk the associations to find the appropriate output data streams to validate the messages
        workflow_dp_ids, _ = self.rrclient.find_objects(
            workflow_id, PRED.hasDataProduct, RT.DataProduct, True)
        assertions(len(workflow_dp_ids) == 1)

        for dp_id in workflow_dp_ids:
            stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                       None, True)
            assertions(len(stream_ids) == 1)
            data_product_stream_ids.append(stream_ids[0])

        #Start the output stream listener to monitor and collect messages
        results = self.start_output_stream_and_listen(ctd_stream_id,
                                                      data_product_stream_ids)

        #Stop the workflow processes
        self.workflowclient.terminate_data_process_workflow(
            workflow_id=workflow_id, delete_data_products=False,
            timeout=60)  # Should test true at some point

        #Validate the data from each of the messages along the way
        self.validate_mpl_graphs_transform_results(results)

        # Check to see if ingestion worked. Extract the granules from data_retrieval.
        # First find the dataset associated with the output dp product
        ds_ids, _ = self.rrclient.find_objects(
            workflow_dp_ids[len(workflow_dp_ids) - 1], PRED.hasDataset,
            RT.Dataset, True)
        retrieved_granule = self.data_retriever.retrieve_last_data_points(
            ds_ids[0], 10)

        #Validate the data from each of the messages along the way
        self.validate_mpl_graphs_transform_results(retrieved_granule)

        #Cleanup to make sure delete is correct.
        self.workflowclient.delete_workflow_definition(workflow_def_id)
        """
        workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition)
        assertions(len(workflow_def_ids) == 0 )
        """

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Not integrated for CEI')
    def test_multiple_workflow_instances(self):

        assertions = self.assertTrue

        # Build the workflow definition
        workflow_def_obj = IonObject(
            RT.WorkflowDefinition,
            name='Multiple_Test_Workflow',
            description='Tests the workflow of converting stream data')

        #Add a transformation process definition
        highcharts_procdef_id = self.create_highcharts_data_process_definition(
        )
        workflow_step_obj = IonObject(
            'DataProcessWorkflowStep',
            data_process_definition_id=highcharts_procdef_id,
            persist_process_output_data=False)
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #Create it in the resource registry
        workflow_def_id = self.workflowclient.create_workflow_definition(
            workflow_def_obj)

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the first input data product
        ctd_stream_id1, ctd_parsed_data_product_id1 = self.create_ctd_input_stream_and_data_product(
            'ctd_parsed1')
        data_product_stream_ids.append(ctd_stream_id1)

        #Create and start the first workflow
        workflow_id1, workflow_product_id1 = self.workflowclient.create_data_process_workflow(
            workflow_def_id, ctd_parsed_data_product_id1, timeout=60)

        #Create the second input data product
        ctd_stream_id2, ctd_parsed_data_product_id2 = self.create_ctd_input_stream_and_data_product(
            'ctd_parsed2')
        data_product_stream_ids.append(ctd_stream_id2)

        #Create and start the second workflow
        workflow_id2, workflow_product_id2 = self.workflowclient.create_data_process_workflow(
            workflow_def_id, ctd_parsed_data_product_id2, timeout=60)

        #Walk the associations to find the appropriate output data streams to validate the messages
        workflow_ids, _ = self.rrclient.find_resources(restype=RT.Workflow)
        assertions(len(workflow_ids) == 2)

        #Start the first input stream process
        ctd_sim_pid1 = self.start_sinusoidal_input_stream_process(
            ctd_stream_id1)

        #Start the second input stream process
        ctd_sim_pid2 = self.start_simple_input_stream_process(ctd_stream_id2)

        #Start the output stream listener to monitor a set number of messages being sent through the workflows
        results = self.start_output_stream_and_listen(
            None, data_product_stream_ids, message_count_per_stream=5)

        # stop the flow of messages...
        self.process_dispatcher.cancel_process(
            ctd_sim_pid1
        )  # kill the ctd simulator process - that is enough data
        self.process_dispatcher.cancel_process(ctd_sim_pid2)

        #Stop the first workflow processes
        self.workflowclient.terminate_data_process_workflow(
            workflow_id=workflow_id1, delete_data_products=False,
            timeout=60)  # Should test true at some point

        #Stop the second workflow processes
        self.workflowclient.terminate_data_process_workflow(
            workflow_id=workflow_id2, delete_data_products=False,
            timeout=60)  # Should test true at some point

        workflow_ids, _ = self.rrclient.find_resources(restype=RT.Workflow)
        assertions(len(workflow_ids) == 0)

        #Cleanup to make sure delete is correct.
        self.workflowclient.delete_workflow_definition(workflow_def_id)
        """