Exemple #1
0
    def test_read_and_update_subscription(self):
        # Mocks
        subscription_obj = Subscription()
        subscription_obj.query = StreamQuery(['789'])
        subscription_obj.is_active=False
        subscription_obj.subscription_type = SubscriptionTypeEnum.STREAM_QUERY
        self.mock_read.return_value = subscription_obj

        self.mock_find_objects.return_value = (['789'],['This here is an association'])
        self.mock_update.return_value = ('not important','even less so')
        # Execution
        query = StreamQuery(['123'])
        retval = self.pubsub_service.update_subscription('subscription_id', query)

        # Assertions
        self.mock_read.assert_called_once_with('subscription_id','')
        self.mock_find_objects.assert_called_once_with('subscription_id',PRED.hasStream,'',True)
        self.mock_delete_association.assert_called_once_with('This here is an association')
        self.mock_create_association.assert_called_once_with('subscription_id',PRED.hasStream,'123',None)
        self.assertTrue(self.mock_update.call_count == 1, 'update was not called')
    def _start_output_stream_listener(self,
                                      data_product_stream_ids,
                                      message_count_per_stream=10):

        cc = self.container
        assertions = self.assertTrue

        ###
        ### Make a subscriber in the test to listen for transformed data
        ###
        salinity_subscription_id = self.pubsubclient.create_subscription(
            query=StreamQuery(data_product_stream_ids),
            exchange_name='workflow_test',
            name="test workflow transformations",
        )

        pid = cc.spawn_process(name='dummy_process_for_test',
                               module='pyon.ion.process',
                               cls='SimpleProcess',
                               config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process,
                                                         node=cc.node)

        result = gevent.event.AsyncResult()
        results = []

        def message_received(message, headers):
            # Heads
            log.warn(' data received!')
            results.append(message)
            if len(results) >= len(
                    data_product_stream_ids
            ) * message_count_per_stream:  #Only wait for so many messages - per stream
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(
            exchange_name='workflow_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        self.pubsubclient.activate_subscription(
            subscription_id=salinity_subscription_id)

        # Assert that we have received data
        assertions(result.get(timeout=30))

        self.pubsubclient.deactivate_subscription(
            subscription_id=salinity_subscription_id)

        subscriber.stop()

        return results
Exemple #3
0
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2dm.yml')

        self.pubsub_cli = PubsubManagementServiceClient(node=self.container.node)

        self.ctd_stream1_id = self.pubsub_cli.create_stream(name="SampleStream1",
                                                            description="Sample Stream 1 Description")

        self.ctd_stream2_id = self.pubsub_cli.create_stream(name="SampleStream2",
                                                            description="Sample Stream 2 Description")

        # Make a subscription to two input streams
        exchange_name = "a_queue"
        query = StreamQuery([self.ctd_stream1_id, self.ctd_stream2_id])

        self.ctd_subscription_id = self.pubsub_cli.create_subscription(query,
                                                                       exchange_name,
                                                                       "SampleSubscription",
                                                                       "Sample Subscription Description")

        # Make a subscription to all streams on an exchange point
        exchange_name = "another_queue"
        query = ExchangeQuery()

        self.exchange_subscription_id = self.pubsub_cli.create_subscription(query,
            exchange_name,
            "SampleExchangeSubscription",
            "Sample Exchange Subscription Description")


        pid = self.container.spawn_process(name='dummy_process_for_test',
            module='pyon.ion.process',
            cls='SimpleProcess',
            config={})
        dummy_process = self.container.proc_manager.procs[pid]


        # Normally the user does not see or create the publisher, this is part of the containers business.
        # For the test we need to set it up explicitly
        publisher_registrar = StreamPublisherRegistrar(process=dummy_process, node=self.container.node)

        self.ctd_stream1_publisher = publisher_registrar.create_publisher(stream_id=self.ctd_stream1_id)

        self.ctd_stream2_publisher = publisher_registrar.create_publisher(stream_id=self.ctd_stream2_id)


        # Cheat and use the cc as the process - I don't think it is used for anything...
        self.stream_subscriber = StreamSubscriberRegistrar(process=dummy_process, node=self.container.node)
    def setUp(self):
        # set up the container
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2dm.yml')

        self.pubsub_cli = PubsubManagementServiceClient(node=self.container.node)
        self.tms_cli = TransformManagementServiceClient(node=self.container.node)
        self.rr_cli = ResourceRegistryServiceClient(node=self.container.node)
        self.procd_cli = ProcessDispatcherServiceClient(node=self.container.node)

        self.input_stream_id = self.pubsub_cli.create_stream(name='input_stream',original=True)

        self.input_subscription_id = self.pubsub_cli.create_subscription(query=StreamQuery(stream_ids=[self.input_stream_id]),exchange_name='transform_input',name='input_subscription')

        self.output_stream_id = self.pubsub_cli.create_stream(name='output_stream',original=True)

        self.process_definition = ProcessDefinition(name='basic_transform_definition')
        self.process_definition.executable = {'module': 'ion.processes.data.transforms.transform_example',
                                              'class':'TransformExample'}
        self.process_definition_id = self.procd_cli.create_process_definition(process_definition=self.process_definition)
Exemple #5
0
    def _start_data_subscribers(self):
        """
        """
        # Create a pubsub client to create streams.
        pubsub_client = PubsubManagementServiceClient(node=self.container.node)

        # A callback for processing subscribed-to data.
        def consume_data(message, headers):
            log.info('Subscriber received data message: %s.', str(message))
            self._samples_received.append(message)
            if self._no_samples and self._no_samples == len(self._samples_received):
                self._async_data_result.set()
                
        # Create a stream subscriber registrar to create subscribers.
        subscriber_registrar = StreamSubscriberRegistrar(process=self.container,
                                                node=self.container.node)

        # Create streams and subscriptions for each stream named in driver.
        self._stream_config = {}
        self._data_subscribers = []
        for (stream_name, val) in PACKET_CONFIG.iteritems():
            stream_def = ctd_stream_definition(stream_id=None)
            stream_def_id = pubsub_client.create_stream_definition(
                                                    container=stream_def)        
            stream_id = pubsub_client.create_stream(
                        name=stream_name,
                        stream_definition_id=stream_def_id,
                        original=True,
                        encoding='ION R2')
            self._stream_config[stream_name] = stream_id
            
            # Create subscriptions for each stream.
            exchange_name = '%s_queue' % stream_name
            sub = subscriber_registrar.create_subscriber(exchange_name=exchange_name,
                                                         callback=consume_data)
            self._listen(sub)
            self._data_subscribers.append(sub)
            query = StreamQuery(stream_ids=[stream_id])
            sub_id = pubsub_client.create_subscription(\
                                query=query, exchange_name=exchange_name)
            pubsub_client.activate_subscription(sub_id)
    def on_start(self):

        pubsub_cli = PubsubManagementServiceProcessClient(
            process=self, node=self.container.node)

        # Get the stream(s)
        stream_id = self.CFG.get_safe('process.stream_id', '')

        query = StreamQuery(stream_ids=[
            stream_id,
        ])

        exchange_name = 'dispatcher_%s' % self.id

        subscription_id = pubsub_cli.create_subscription(
            query=query,
            exchange_name=exchange_name,
            name="SampleSubscription",
            description="Sample Subscription Description")

        stream_subscriber = StreamSubscriberRegistrar(process=self,
                                                      node=self.container.node)

        def message_received(granule, h):

            rdt = RecordDictionaryTool.load_from_granule(granule)

            log.warn(
                'Logging Record Dictionary received in logger subscription  \n%s',
                rdt.pretty_print())

        subscriber = stream_subscriber.create_subscriber(
            exchange_name=exchange_name, callback=message_received)
        subscriber.start()

        pubsub_cli.activate_subscription(subscription_id)
    def on_start(self):

        rr_cli = ResourceRegistryServiceProcessClient(process=self,
                                                      node=self.container.node)
        pubsub_cli = PubsubManagementServiceProcessClient(
            process=self, node=self.container.node)

        # Get the stream(s)
        data_product_id = self.CFG.get_safe('dispatcher.data_product_id', '')

        stream_ids, _ = rr_cli.find_objects(subject=data_product_id,
                                            predicate=PRED.hasStream,
                                            id_only=True)

        log.info('Got Stream Ids: "%s"', stream_ids)
        assert stream_ids, 'No streams found for this data product!'

        query = StreamQuery(stream_ids=stream_ids)

        exchange_name = 'dispatcher_%s' % str(os.getpid())

        subscription_id = pubsub_cli.create_subscription(
            query=query,
            exchange_name=exchange_name,
            name="SampleSubscription",
            description="Sample Subscription Description")

        stream_subscriber = StreamSubscriberRegistrar(process=self,
                                                      node=self.container.node)

        stream_defs = {}

        def message_received(granule, h):

            stream_id = granule.stream_resource_id

            data_stream_id = granule.data_stream_id
            data_stream = granule.identifiables[data_stream_id]

            tstamp = get_datetime(data_stream.timestamp.value)

            records = granule.identifiables['record_count'].value

            log.info(
                'Received a message from stream %s with time stamp %s and %d records'
                % (stream_id, tstamp, records))

            if stream_id not in stream_defs:
                stream_defs[stream_id] = pubsub_cli.find_stream_definition(
                    stream_id, id_only=False).container
            stream_def = stream_defs.get(stream_id)

            sp = PointSupplementStreamParser(stream_definition=stream_def,
                                             stream_granule=granule)

            last_data = {}
            for field in sp.list_field_names():
                last_data[field] = sp.get_values(field)[-1]

            log.info('Last values in the message: %s' % str(last_data))

        subscriber = stream_subscriber.create_subscriber(
            exchange_name=exchange_name, callback=message_received)
        subscriber.start()

        pubsub_cli.activate_subscription(subscription_id)
    def on_start(self):

        # The data dictionary object holds a copy of all the viz products created by the service. The viz
        # products are indexed by the viz_product_type and data_product_id (which could be google_datatables or
        # mpl_graphs
        self.viz_data_dictionary = {}
        self.viz_data_dictionary['google_dt'] = {}
        self.viz_data_dictionary['google_realtime_dt'] = {}
        self.viz_data_dictionary['matplotlib_graphs'] = {}
        # Kind of redundant but we will maintain a separate list of data product_ids registered with the viz_service
        self.data_products = []

        # Create clients to interface with PubSub, Transform Management Service and Resource Registry
        self.pubsub_cli = self.clients.pubsub_management
        self.tms_cli = self.clients.transform_management
        self.rr_cli = self.clients.resource_registry
        self.dr_cli = self.clients.data_retriever
        self.dsm_cli = self.clients.dataset_management
        """
        # Create process definitions which will used to spawn off the transform processes
        self.matplotlib_proc_def = IonObject(RT.ProcessDefinition, name='viz_transform_process'+'.'+self.random_id_generator())
        self.matplotlib_proc_def.executable = {
            'module': 'ion.services.ans.visualization_service',
            'class':'VizTransformProcForMatplotlibGraphs'
        }
        self.matplotlib_proc_def_id, _ = self.rr_cli.create(self.matplotlib_proc_def)

        self.google_dt_proc_def = IonObject(RT.ProcessDefinition, name='viz_transform_process'+'.'+self.random_id_generator())
        self.google_dt_proc_def.executable = {
            'module': 'ion.services.ans.visualization_service',
            'class':'VizTransformProcForGoogleDT'
        }
        self.google_dt_proc_def_id, _ = self.rr_cli.create(self.google_dt_proc_def)
        """

        # Query resource registry to get process definitions and streams ids made by the bootstrap
        proc_def_ids, _ = self.rr_cli.find_resources(
            restype=RT.ProcessDefinition,
            lcstate=None,
            name="viz_matplotlib_transform_process",
            id_only=True)
        self.matplotlib_proc_def_id = proc_def_ids[0]

        proc_def_ids, _ = self.rr_cli.find_resources(
            restype=RT.ProcessDefinition,
            lcstate=None,
            name="viz_google_dt_transform_process",
            id_only=True)
        self.google_dt_proc_def_id = proc_def_ids[0]

        # Create a stream that all the transform processes will use to submit data back to the viz service
        self.viz_service_submit_stream_id = self.pubsub_cli.create_stream(
            name="visualization_service_submit_stream." +
            self.random_id_generator())

        # subscribe to this stream since all the results from transforms will be submitted here
        query = StreamQuery(stream_ids=[
            self.viz_service_submit_stream_id,
        ])
        self.viz_service_submit_stream_sub_id = self.pubsub_cli.create_subscription(
            query=query, exchange_name="visualization_service_submit_queue")
        submit_stream_subscriber_registrar = StreamSubscriberRegistrar(
            process=self.container, node=self.container.node)
        submit_stream_subscriber = submit_stream_subscriber_registrar.create_subscriber(
            exchange_name='visualization_service_submit_queue',
            callback=self.process_submission)
        submit_stream_subscriber.start()

        self.pubsub_cli.activate_subscription(
            self.viz_service_submit_stream_sub_id)

        # Discover the existing data_product_ids active in the system
        sys_prod_ids, _ = self.rr_cli.find_resources(RT.DataProduct, None,
                                                     None, True)

        # Register all the streams in the system, which will in turn start transform processes
        for dp_id in sys_prod_ids:
            self.register_new_data_product(dp_id)

        # listen for events when new data_products show up
        self.event_subscriber = EventSubscriber(
            event_type="ResourceModifiedEvent",
            origin_type="DataProduct",
            sub_type="UPDATE",
            callback=self.receive_new_dataproduct_event)

        self.event_subscriber.activate()

        return
    def register_new_data_product(self, data_product_id=''):
        """Apprise the Visualization service of a new data product in the system. This function inits transform
        processes for generating the matplotlib graphs of the new data product. It also creates transform processes which
        generate Google data-tables for the real-time streams (sliding window) coming in from the instruments.

        @param data_product_id    str
        @throws BadRequest    check data_product_id for duplicates
        """

        # Check to see if the DP has already been registered. If yes, do nothing
        if (data_product_id in self.viz_data_dictionary['matplotlib_graphs']
            ) or (data_product_id
                  in self.viz_data_dictionary['google_realtime_dt']):
            log.warn(
                "Data Product has already been registered with Visualization service. Ignoring."
            )
            return

        # extract the stream_id associated with the data_product_id
        viz_stream_id, _ = self.rr_cli.find_objects(data_product_id,
                                                    PRED.hasStream, None, True)

        if viz_stream_id == []:
            log.warn("Visualization_service: viz_stream_id is empty")
            return

        viz_stream_def_id = self.pubsub_cli.find_stream_definition(
            stream_id=viz_stream_id[0], id_only=True)

        # Go ahead only if the data product is unique
        if data_product_id in self.data_products:
            raise BadRequest
        self.data_products[len(self.data_products):] = data_product_id

        # init the space needed to store matplotlib_graphs and realtime Google data tables

        # For the matplotlib graphs, the list_of_images stores the names of the image files. The actual binary data for the
        # images is also stored in the same dictionary as {img_name1: binary_data1, img_name2: binary_data2 .. etc}
        self.viz_data_dictionary['matplotlib_graphs'][data_product_id] = {
            'transform_proc': "",
            'list_of_images': []
        }

        # The 'data_table' key points to a JSON string
        self.viz_data_dictionary['google_realtime_dt'][data_product_id] = {
            'transform_proc': "",
            'data_table': []
        }

        ###############################################################################
        # Create transform process for the matplotlib graphs.
        ###############################################################################

        # Create the subscription to the stream. This will be passed as parameter to the transform worker
        #query1 = StreamQuery(stream_ids=[viz_stream_id,])
        query1 = StreamQuery(viz_stream_id)
        viz_subscription_id1 = self.pubsub_cli.create_subscription(
            query=query1,
            exchange_name='viz_data_exchange.' + self.random_id_generator())

        # maybe this is a good place to pass the couch DB table to use and other parameters
        configuration1 = {
            "stream_def_id": viz_stream_def_id,
            "data_product_id": data_product_id
        }

        # Launch the viz transform process
        viz_transform_id1 = self.tms_cli.create_transform(
            name='viz_transform_matplotlib_' + self.random_id_generator() +
            '.' + data_product_id,
            in_subscription_id=viz_subscription_id1,
            out_streams={
                "visualization_service_submit_stream_id":
                self.viz_service_submit_stream_id
            },
            process_definition_id=self.matplotlib_proc_def_id,
            configuration=configuration1)
        self.tms_cli.activate_transform(viz_transform_id1)

        # keep a record of the the viz_transform_id
        self.viz_data_dictionary['matplotlib_graphs'][data_product_id][
            'transform_proc'] = viz_transform_id1

        ###############################################################################
        # Create transform process for the Google realtime datatables
        ###############################################################################

        # Create the subscription to the stream. This will be passed as parameter to the transform worker
        #query2 = StreamQuery(stream_ids=[viz_stream_id,])
        query2 = StreamQuery(viz_stream_id)
        viz_subscription_id2 = self.pubsub_cli.create_subscription(
            query=query2,
            exchange_name='viz_data_exchange.' + self.random_id_generator())

        # maybe this is a good place to pass the couch DB table to use and other parameters
        configuration2 = {
            "stream_def_id": viz_stream_def_id,
            "data_product_id": data_product_id,
            "realtime_flag": "True"
        }

        # Launch the viz transform process
        viz_transform_id2 = self.tms_cli.create_transform(
            name='viz_transform_realtime_google_dt_' +
            self.random_id_generator() + '.' + data_product_id,
            in_subscription_id=viz_subscription_id2,
            out_streams={
                "visualization_service_submit_stream_id":
                self.viz_service_submit_stream_id
            },
            process_definition_id=self.google_dt_proc_def_id,
            configuration=configuration2)
        self.tms_cli.activate_transform(viz_transform_id2)

        # keep a record of the the viz_transform_id
        self.viz_data_dictionary['google_realtime_dt'][data_product_id][
            'transform_proc'] = viz_transform_id2
    def start_google_dt_transform(self, data_product_id='', query=''):
        """Request to fetch the datatable for a data product as specified in the query. Query will also specify whether its a realtime view or one-shot

        @param data_product_id    str
        @param query    str
        @retval datatable    str
        @throws NotFound    object with specified id, query does not exist
        """

        # generate a token unique for this request
        data_product_id_token = data_product_id + "." + self.random_id_generator(
        )

        # Get object asssociated with data_product_id
        dp_obj = self.rr_cli.read(data_product_id)

        if dp_obj.dataset_id == '':
            return None

        # define replay. If no filters are passed the entire ingested dataset is returned
        replay_id, replay_stream_id = self.dr_cli.define_replay(
            dataset_id=dp_obj.dataset_id)

        replay_stream_def_id = self.pubsub_cli.find_stream_definition(
            stream_id=replay_stream_id, id_only=True)

        # setup the transform to handle the data coming back from the replay
        # Init storage for the resulting data_table
        self.viz_data_dictionary['google_dt'][data_product_id_token] = {
            'data_table': [],
            'ready_flag': False
        }

        # Create the subscription to the stream. This will be passed as parameter to the transform worker
        query = StreamQuery(stream_ids=[
            replay_stream_id,
        ])
        replay_subscription_id = self.pubsub_cli.create_subscription(
            query=query,
            exchange_name='viz_data_exchange.' + self.random_id_generator())

        # maybe this is a good place to pass the couch DB table to use and other parameters
        configuration = {
            "stream_def_id": replay_stream_def_id,
            "data_product_id": data_product_id,
            "realtime_flag": "False",
            "data_product_id_token": data_product_id_token
        }

        # Launch the viz transform process
        viz_transform_id = self.tms_cli.create_transform(
            name='viz_transform_google_dt_' + self.random_id_generator() +
            '.' + data_product_id,
            in_subscription_id=replay_subscription_id,
            out_streams={
                "visualization_service_submit_stream_id":
                self.viz_service_submit_stream_id
            },
            process_definition_id=self.google_dt_proc_def_id,
            configuration=configuration)
        self.tms_cli.activate_transform(viz_transform_id)

        # keep a record of the the viz_transform_id
        self.viz_data_dictionary['google_dt'][data_product_id_token][
            'transform_proc'] = viz_transform_id

        # Start the replay and return the token
        self.dr_cli.start_replay(replay_id=replay_id)

        return "google_dt_transform_cb(\"" + data_product_id_token + "\")"
Exemple #11
0
    def test_dm_integration(self):
        '''
        test_salinity_transform
        Test full DM Services Integration
        '''
        cc = self.container
        assertions = self.assertTrue

        #-----------------------------
        # Copy below here to run as a script (don't forget the imports of course!)
        #-----------------------------

        # Create some service clients...
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(
            node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(
            node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)
        transform_management_service = TransformManagementServiceClient(
            node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        # declare some handy variables

        datastore_name = 'test_dm_integration'

        ###
        ### In the beginning there were two stream definitions...
        ###
        # create a stream definition for the data from the ctd simulator
        ctd_stream_def = SBE37_CDM_stream_definition()
        ctd_stream_def_id = pubsub_management_service.create_stream_definition(
            container=ctd_stream_def, name='Simulated CTD data')

        # create a stream definition for the data from the salinity Transform
        sal_stream_def_id = pubsub_management_service.create_stream_definition(
            container=SalinityTransform.outgoing_stream_def,
            name='Scalar Salinity data stream')

        ###
        ### And two process definitions...
        ###
        # one for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module': 'ion.processes.data.ctd_stream_publisher',
            'class': 'SimpleCtdPublisher'
        }

        ctd_sim_procdef_id = process_dispatcher.create_process_definition(
            process_definition=producer_definition)

        # one for the salinity transform
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module': 'ion.processes.data.transforms.ctd.ctd_L2_salinity',
            'class': 'SalinityTransform'
        }

        salinity_transform_procdef_id = process_dispatcher.create_process_definition(
            process_definition=producer_definition)

        #---------------------------
        # Set up ingestion - this is an operator concern - not done by SA in a deployed system
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,
                                       datastore_profile='SCIDATA'),
            number_of_workers=1)
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)

        #---------------------------
        # Set up the producer (CTD Simulator)
        #---------------------------

        # Create the stream
        ctd_stream_id = pubsub_management_service.create_stream(
            stream_definition_id=ctd_stream_def_id)

        # Set up the datasets
        ctd_dataset_id = dataset_management_service.create_dataset(
            stream_id=ctd_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule')

        # Configure ingestion of this dataset
        ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id=ctd_dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=
            ingestion_configuration_id,  # you need to know the ingestion configuration id!
        )
        # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        #---------------------------
        # Set up the salinity transform
        #---------------------------

        # Create the stream
        sal_stream_id = pubsub_management_service.create_stream(
            stream_definition_id=sal_stream_def_id)

        # Set up the datasets
        sal_dataset_id = dataset_management_service.create_dataset(
            stream_id=sal_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule')

        # Configure ingestion of the salinity as a dataset
        sal_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id=sal_dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=
            ingestion_configuration_id,  # you need to know the ingestion configuration id!
        )
        # Hold onto sal_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        # Create a subscription as input to the transform
        sal_transform_input_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery(stream_ids=[
                ctd_stream_id,
            ]),
            exchange_name='salinity_transform_input'
        )  # how do we make these names??? i.e. Should they be anonymous?

        # create the salinity transform
        sal_transform_id = transform_management_service.create_transform(
            name='example salinity transform',
            in_subscription_id=sal_transform_input_subscription_id,
            out_streams={
                'output': sal_stream_id,
            },
            process_definition_id=salinity_transform_procdef_id,
            # no configuration needed at this time...
        )
        # start the transform - for a test case it makes sense to do it before starting the producer but it is not required
        transform_management_service.activate_transform(
            transform_id=sal_transform_id)

        # Start the ctd simulator to produce some data
        configuration = {
            'process': {
                'stream_id': ctd_stream_id,
            }
        }
        ctd_sim_pid = process_dispatcher.schedule_process(
            process_definition_id=ctd_sim_procdef_id,
            configuration=configuration)

        ###
        ### Make a subscriber in the test to listen for salinity data
        ###
        salinity_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery([
                sal_stream_id,
            ]),
            exchange_name='salinity_test',
            name="test salinity subscription",
        )

        pid = cc.spawn_process(name='dummy_process_for_test',
                               module='pyon.ion.process',
                               cls='SimpleProcess',
                               config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process,
                                                         node=cc.node)

        result = gevent.event.AsyncResult()
        results = []

        def message_received(message, headers):
            # Heads
            log.warn('Salinity data received!')
            results.append(message)
            if len(results) > 3:
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(
            exchange_name='salinity_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        pubsub_management_service.activate_subscription(
            subscription_id=salinity_subscription_id)

        # Assert that we have received data
        assertions(result.get(timeout=10))

        # stop the flow parse the messages...
        process_dispatcher.cancel_process(
            ctd_sim_pid
        )  # kill the ctd simulator process - that is enough data

        for message in results:

            psd = PointSupplementStreamParser(
                stream_definition=SalinityTransform.outgoing_stream_def,
                stream_granule=message)

            # Test the handy info method for the names of fields in the stream def
            assertions('salinity' in psd.list_field_names())

            # you have to know the name of the coverage in stream def
            salinity = psd.get_values('salinity')

            import numpy

            assertions(isinstance(salinity, numpy.ndarray))

            assertions(numpy.nanmin(salinity) >
                       0.0)  # salinity should always be greater than 0
    def test_usgs_integration(self):
        '''
        test_usgs_integration
        Test full DM Services Integration using usgs
        '''
        cc = self.container
        assertions = self.assertTrue

        #-----------------------------
        # Copy below here
        #-----------------------------
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)
        transform_management_service = TransformManagementServiceClient(node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        process_list = []
        datasets = []

        datastore_name = 'test_usgs_integration'


        #---------------------------
        # Set up ingestion
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'),
            number_of_workers=8
        )
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)

        usgs_stream_def = USGS_stream_definition()

        stream_def_id = pubsub_management_service.create_stream_definition(container=usgs_stream_def, name='Junk definition')


        #---------------------------
        # Set up the producers (CTD Simulators)
        #---------------------------
        # Launch five simulated CTD producers
        for iteration in xrange(2):
            # Make a stream to output on

            stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id)

            #---------------------------
            # Set up the datasets
            #---------------------------
            dataset_id = dataset_management_service.create_dataset(
                stream_id=stream_id,
                datastore_name=datastore_name,
                view_name='datasets/stream_join_granule'
            )
            # Keep track of the datasets
            datasets.append(dataset_id)

            stream_policy_id = ingestion_management_service.create_dataset_configuration(
                dataset_id = dataset_id,
                archive_data = True,
                archive_metadata = True,
                ingestion_configuration_id = ingestion_configuration_id
            )


            producer_definition = ProcessDefinition()
            producer_definition.executable = {
                'module':'eoi.agent.handler.usgs_stream_publisher',
                'class':'UsgsPublisher'
            }
            configuration = {
                'process':{
                    'stream_id':stream_id,
                    }
            }
            procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)
            log.debug('LUKE_DEBUG: procdef_id: %s', procdef_id)
            pid = process_dispatcher.schedule_process(process_definition_id=procdef_id, configuration=configuration)


            # Keep track, we'll kill 'em later.
            process_list.append(pid)
            # Get about 4 seconds of data
        time.sleep(4)

        #---------------------------
        # Stop producing data
        #---------------------------

        for process in process_list:
            process_dispatcher.cancel_process(process)

        #----------------------------------------------
        # The replay and the transform, a love story.
        #----------------------------------------------
        # Happy Valentines to the clever coder who catches the above!

        transform_definition = ProcessDefinition()
        transform_definition.executable = {
            'module':'ion.processes.data.transforms.transform_example',
            'class':'TransformCapture'
        }
        transform_definition_id = process_dispatcher.create_process_definition(process_definition=transform_definition)

        dataset_id = datasets.pop() # Just need one for now
        replay_id, stream_id = data_retriever_service.define_replay(dataset_id=dataset_id)

        #--------------------------------------------
        # I'm Selling magazine subscriptions here!
        #--------------------------------------------

        subscription = pubsub_management_service.create_subscription(query=StreamQuery(stream_ids=[stream_id]),
            exchange_name='transform_capture_point')

        #--------------------------------------------
        # Start the transform (capture)
        #--------------------------------------------
        transform_id = transform_management_service.create_transform(
            name='capture_transform',
            in_subscription_id=subscription,
            process_definition_id=transform_definition_id
        )

        transform_management_service.activate_transform(transform_id=transform_id)

        #--------------------------------------------
        # BEGIN REPLAY!
        #--------------------------------------------

        data_retriever_service.start_replay(replay_id=replay_id)

        #--------------------------------------------
        # Lets get some boundaries
        #--------------------------------------------

        bounds = dataset_management_service.get_dataset_bounds(dataset_id=dataset_id)
    def setUp(self):
        """
        Setup the test environment to exersice use of instrumet agent, including:
        * define driver_config parameters.
        * create container with required services and container client.
        * create publication stream ids for each driver data stream.
        * create stream_config parameters.
        * create and activate subscriptions for agent data streams.
        * spawn instrument agent process and create agent client.
        * add cleanup functions to cause subscribers to get stopped.
        """

        #       params = { ('CTD', 'TA2'): -1.9434316e-05,
        #       ('CTD', 'PTCA1'): 1.3206866,
        #       ('CTD', 'TCALDATE'): [8, 11, 2006] }

        #       for tup in params:
        #           print tup

        self.addCleanup(self.customCleanUp)
        # Names of agent data streams to be configured.
        parsed_stream_name = 'ctd_parsed'
        raw_stream_name = 'ctd_raw'

        # Driver configuration.
        #Simulator

        self.driver_config = {
            'svr_addr': 'localhost',
            'cmd_port': 5556,
            'evt_port': 5557,
            'dvr_mod': 'ion.agents.instrument.drivers.sbe37.sbe37_driver',
            'dvr_cls': 'SBE37Driver',
            'comms_config': {
                SBE37Channel.CTD: {
                    'method': 'ethernet',
                    'device_addr': CFG.device.sbe37.host,
                    'device_port': CFG.device.sbe37.port,
                    'server_addr': 'localhost',
                    'server_port': 8888
                }
            }
        }

        #Hardware
        '''
        self.driver_config = {
            'svr_addr': 'localhost',
            'cmd_port': 5556,
            'evt_port': 5557,
            'dvr_mod': 'ion.agents.instrument.drivers.sbe37.sbe37_driver',
            'dvr_cls': 'SBE37Driver',
            'comms_config': {
                SBE37Channel.CTD: {
                    'method':'ethernet',
                    'device_addr': '137.110.112.119',
                    'device_port': 4001,
                    'server_addr': 'localhost',
                    'server_port': 8888
                }
            }
        }
        '''

        # Start container.
        self._start_container()

        # Establish endpoint with container (used in tests below)
        self._container_client = ContainerAgentClient(node=self.container.node,
                                                      name=self.container.name)

        # Bring up services in a deploy file (no need to message)
        self.container.start_rel_from_url('res/deploy/r2dm.yml')

        # Create a pubsub client to create streams.
        self._pubsub_client = PubsubManagementServiceClient(
            node=self.container.node)

        # A callback for processing subscribed-to data.
        def consume(message, headers):
            log.info('Subscriber received message: %s', str(message))

        # Create a stream subscriber registrar to create subscribers.
        subscriber_registrar = StreamSubscriberRegistrar(
            process=self.container, node=self.container.node)

        self.subs = []

        # Create streams for each stream named in driver.
        self.stream_config = {}
        for (stream_name, val) in PACKET_CONFIG.iteritems():
            stream_def = ctd_stream_definition(stream_id=None)
            stream_def_id = self._pubsub_client.create_stream_definition(
                container=stream_def)
            stream_id = self._pubsub_client.create_stream(
                name=stream_name,
                stream_definition_id=stream_def_id,
                original=True,
                encoding='ION R2')
            self.stream_config[stream_name] = stream_id

            # Create subscriptions for each stream.
            exchange_name = '%s_queue' % stream_name
            sub = subscriber_registrar.create_subscriber(
                exchange_name=exchange_name, callback=consume)
            sub.start()
            query = StreamQuery(stream_ids=[stream_id])
            sub_id = self._pubsub_client.create_subscription(\
                                query=query, exchange_name=exchange_name)
            self._pubsub_client.activate_subscription(sub_id)
            self.subs.append(sub)

        # Add cleanup function to stop subscribers.
        def stop_subscriber(sub_list):
            for sub in sub_list:
                sub.stop()

        self.addCleanup(stop_subscriber, self.subs)

        # Create agent config.

        self.agent_resource_id = '123xyz'

        self.agent_config = {
            'driver_config': self.driver_config,
            'stream_config': self.stream_config,
            'agent': {
                'resource_id': self.agent_resource_id
            }
        }

        # Launch an instrument agent process.
        self._ia_name = 'agent007'
        self._ia_mod = 'ion.agents.instrument.instrument_agent'
        self._ia_class = 'InstrumentAgent'
        self._ia_pid = self._container_client.spawn_process(
            name=self._ia_name,
            module=self._ia_mod,
            cls=self._ia_class,
            config=self.agent_config)

        log.info('got pid=%s', str(self._ia_pid))

        self._ia_client = None
        # Start a resource agent client to talk with the instrument agent.
        self._ia_client = ResourceAgentClient(self.agent_resource_id,
                                              process=FakeProcess())
        log.info('got ia client %s', str(self._ia_client))
    def run_even_odd_transform(self):
        '''
        This example script runs a chained three way transform:
            B
        A <
            C
        Where A is the even_odd transform (generates a stream of even and odd numbers from input)
        and B and C are the basic transforms that receive even and odd input
        '''
        pubsub_cli = PubsubManagementServiceClient(node=self.container.node)
        tms_cli = TransformManagementServiceClient(node=self.container.node)
        procd_cli = ProcessDispatcherServiceClient(node=self.container.node)

        #-------------------------------
        # Process Definition
        #-------------------------------
        # Create the process definition for the basic transform
        process_definition = IonObject(RT.ProcessDefinition,
                                       name='basic_transform_definition')
        process_definition.executable = {
            'module': 'ion.processes.data.transforms.transform_example',
            'class': 'TransformExample'
        }
        basic_transform_definition_id = procd_cli.create_process_definition(
            process_definition=process_definition)

        # Create The process definition for the TransformEvenOdd
        process_definition = IonObject(RT.ProcessDefinition,
                                       name='basic_transform_definition')
        process_definition.executable = {
            'module': 'ion.processes.data.transforms.transform_example',
            'class': 'TransformEvenOdd'
        }
        evenodd_transform_definition_id = procd_cli.create_process_definition(
            process_definition=process_definition)

        #-------------------------------
        # Streams
        #-------------------------------
        input_stream_id = pubsub_cli.create_stream(name='input_stream',
                                                   original=True)

        even_stream_id = pubsub_cli.create_stream(name='even_stream',
                                                  original=True)

        odd_stream_id = pubsub_cli.create_stream(name='odd_stream',
                                                 original=True)

        #-------------------------------
        # Subscriptions
        #-------------------------------

        query = StreamQuery(stream_ids=[input_stream_id])
        input_subscription_id = pubsub_cli.create_subscription(
            query=query, exchange_name='input_queue')

        query = StreamQuery(stream_ids=[even_stream_id])
        even_subscription_id = pubsub_cli.create_subscription(
            query=query, exchange_name='even_queue')

        query = StreamQuery(stream_ids=[odd_stream_id])
        odd_subscription_id = pubsub_cli.create_subscription(
            query=query, exchange_name='odd_queue')

        #-------------------------------
        # Launch the EvenOdd Transform
        #-------------------------------

        evenodd_id = tms_cli.create_transform(
            name='even_odd',
            in_subscription_id=input_subscription_id,
            out_streams={
                'even': even_stream_id,
                'odd': odd_stream_id
            },
            process_definition_id=evenodd_transform_definition_id,
            configuration={})
        tms_cli.activate_transform(evenodd_id)

        #-------------------------------
        # Launch the Even Processing Transform
        #-------------------------------

        even_transform_id = tms_cli.create_transform(
            name='even_transform',
            in_subscription_id=even_subscription_id,
            process_definition_id=basic_transform_definition_id,
            configuration={})
        tms_cli.activate_transform(even_transform_id)

        #-------------------------------
        # Launch the Odd Processing Transform
        #-------------------------------

        odd_transform_id = tms_cli.create_transform(
            name='odd_transform',
            in_subscription_id=odd_subscription_id,
            process_definition_id=basic_transform_definition_id,
            configuration={})
        tms_cli.activate_transform(odd_transform_id)

        #-------------------------------
        # Spawn the Streaming Producer
        #-------------------------------

        id_p = self.container.spawn_process(
            'myproducer', 'ion.processes.data.transforms.transform_example',
            'TransformExampleProducer', {
                'process': {
                    'type': 'stream_process',
                    'publish_streams': {
                        'out_stream': input_stream_id
                    }
                },
                'stream_producer': {
                    'interval': 4000
                }
            })
        self.container.proc_manager.procs[id_p].start()
Exemple #15
0
    def test_update_stream_subscription(self):

        q = gevent.queue.Queue()

        def message_received(message, headers):
            q.put(message)

        subscriber = self.stream_subscriber.create_subscriber(exchange_name='a_queue', callback=message_received)
        subscriber.start()

        self.pubsub_cli.activate_subscription(self.ctd_subscription_id)

        # Both publishers are received by the subscriber
        self.ctd_stream1_publisher.publish('message1')
        self.assertEqual(q.get(timeout=5), 'message1')
        self.assertTrue(q.empty())

        self.ctd_stream2_publisher.publish('message2')
        self.assertEqual(q.get(timeout=5), 'message2')
        self.assertTrue(q.empty())


        # Update the subscription by removing a stream...
        subscription = self.pubsub_cli.read_subscription(self.ctd_subscription_id)
        stream_ids = list(subscription.query.stream_ids)
        stream_ids.remove(self.ctd_stream2_id)
        self.pubsub_cli.update_subscription(
            subscription_id=subscription._id,
            query=StreamQuery(stream_ids=stream_ids)
        )


        # Stream 2 is no longer received
        self.ctd_stream2_publisher.publish('message2')
        p = None
        with self.assertRaises(gevent.queue.Empty) as cm:
            p = q.get(timeout=1)

        ex = cm.exception
        self.assertEqual(str(ex), '')
        self.assertEqual(p, None)

        # Stream 1 is as before
        self.ctd_stream1_publisher.publish('message1')
        self.assertEqual(q.get(timeout=5), 'message1')
        self.assertTrue(q.empty())


        # Now swith the active streams...

        # Update the subscription by removing a stream...
        self.pubsub_cli.update_subscription(
            subscription_id=self.ctd_subscription_id,
            query=StreamQuery([self.ctd_stream2_id])
        )


        # Stream 1 is no longer received
        self.ctd_stream1_publisher.publish('message1')
        p = None
        with self.assertRaises(gevent.queue.Empty) as cm:
            p = q.get(timeout=1)

        ex = cm.exception
        self.assertEqual(str(ex), '')
        self.assertEqual(p, None)

        # Stream 2 is received
        self.ctd_stream2_publisher.publish('message2')
        self.assertEqual(q.get(timeout=5), 'message2')
        self.assertTrue(q.empty())




        subscriber.stop()
    def create_data_process(self,
                            data_process_definition_id=None,
                            in_data_product_ids='',
                            out_data_products=None,
                            configuration=None):
        """
        @param  data_process_definition_id: Object with definition of the
                    transform to apply to the input data product
        @param  in_data_product_ids: ID of the input data products
        @param  out_data_products: list of IDs of the output data products
        @retval data_process_id: ID of the newly created data process object
        """

        inform = "Input Data Product:       "+str(in_data_product_ids)+\
                 "Transformed by:           "+str(data_process_definition_id)+\
                 "To create output Product: "+str(out_data_products)
        log.debug("DataProcessManagementService:create_data_process()\n" +
                  inform)

        if configuration is None:
            configuration = {}

        # Create and store a new DataProcess with the resource registry
        log.debug(
            "DataProcessManagementService:create_data_process - Create and store a new DataProcess with the resource registry"
        )
        data_process_def_obj = self.read_data_process_definition(
            data_process_definition_id)

        data_process_name = "process_" + data_process_def_obj.name \
                             + time.ctime()
        self.data_process = IonObject(RT.DataProcess, name=data_process_name)
        data_process_id, version = self.clients.resource_registry.create(
            self.data_process)
        log.debug(
            "DataProcessManagementService:create_data_process - Create and store a new DataProcess with the resource registry  data_process_id: %s"
            + str(data_process_id))

        # Register the data process instance as a data producer with DataAcquisitionMgmtSvc
        #TODO: should this be outside this method? Called by orchestration?
        data_producer_id = self.clients.data_acquisition_management.register_process(
            data_process_id)
        log.debug(
            "DataProcessManagementService:create_data_process register process with DataAcquisitionMgmtSvc: data_producer_id: %s   (L4-CI-SA-RQ-181)",
            str(data_producer_id))

        self.output_stream_dict = {}
        #TODO: should this be outside this method? Called by orchestration?
        if out_data_products is None:
            raise BadRequest(
                "Data Process must have output product(s) specified %s",
                str(data_process_definition_id))
        for name, out_data_product_id in out_data_products.iteritems():

            # check that the product is not already associated with a producer
            producer_ids, _ = self.clients.resource_registry.find_objects(
                out_data_product_id, PRED.hasDataProducer, RT.DataProducer,
                True)
            if producer_ids:
                raise BadRequest(
                    "Data Product should not already be associated to a DataProducer %s hasDataProducer %s",
                    str(data_process_id), str(producer_ids[0]))

            #Assign each output Data Product to this producer resource
            out_data_product_obj = self.clients.resource_registry.read(
                out_data_product_id)
            if not out_data_product_obj:
                raise NotFound("Output Data Product %s does not exist" %
                               out_data_product_id)
            # Associate with DataProcess: register as an output product for this process
            log.debug(
                "DataProcessManagementService:create_data_process link data process %s and output out data product: %s    (L4-CI-SA-RQ-260)",
                str(data_process_id), str(out_data_product_id))
            self.clients.data_acquisition_management.assign_data_product(
                data_process_id, out_data_product_id, create_stream=False)

            # Retrieve the id of the OUTPUT stream from the out Data Product
            stream_ids, _ = self.clients.resource_registry.find_objects(
                out_data_product_id, PRED.hasStream, RT.Stream, True)

            log.debug(
                "DataProcessManagementService:create_data_process retrieve out data prod streams: %s",
                str(stream_ids))
            if not stream_ids:
                raise NotFound("No Stream created for output Data Product " +
                               str(out_data_product_id))
            if len(stream_ids) != 1:
                raise BadRequest(
                    "Data Product should only have ONE stream at this time" +
                    str(out_data_product_id))
            self.output_stream_dict[name] = stream_ids[0]
            log.debug(
                "DataProcessManagementService:create_data_process -Register the data process instance as a data producer with DataAcquisitionMgmtSvc, then retrieve the id of the OUTPUT stream  out_stream_id: "
                + str(self.output_stream_dict[name]))

        # Associate with dataProcess
        self.clients.resource_registry.create_association(
            data_process_id, PRED.hasProcessDefinition,
            data_process_definition_id)

        #check if data process has lookup tables attached
        self._find_lookup_tables(data_process_definition_id, configuration)

        #Todo: currently this is handled explicitly after creating the data product, that code then calls DMAS:assign_data_product
        log.debug(
            "DataProcessManagementService:create_data_process associate data process workflows with source data products %s hasInputProducts  %s   (L4-CI-SA-RQ-260)",
            str(data_process_id), str(in_data_product_ids))
        for in_data_product_id in in_data_product_ids:
            self.clients.resource_registry.create_association(
                data_process_id, PRED.hasInputProduct, in_data_product_id)

            #check if in data product is attached to an instrument, check instrumentDevice and InstrumentModel for lookup table attachments
            instdevice_ids, _ = self.clients.resource_registry.find_subjects(
                RT.InstrumentDevice, PRED.hasOutputProduct, in_data_product_id,
                True)
            for instdevice_id in instdevice_ids:
                log.debug(
                    "DataProcessManagementService:create_data_process instrument device_id assoc to the input data product of this data process: %s   (L4-CI-SA-RQ-231)",
                    str(instdevice_id))
                self._find_lookup_tables(instdevice_id, configuration)
                instmodel_ids, _ = self.clients.resource_registry.find_objects(
                    instdevice_id, PRED.hasModel, RT.InstrumentModel, True)
                for instmodel_id in instmodel_ids:
                    log.debug(
                        "DataProcessManagementService:create_data_process instmodel_id assoc to the instDevice: %s",
                        str(instmodel_id))
                    self._find_lookup_tables(instmodel_id, configuration)

        #-------------------------------
        # Create subscription from in_data_product, which should already be associated with a stream via the Data Producer
        #-------------------------------

#        # first - get the data producer associated with this IN data product
#        log.debug("DataProcessManagementService:create_data_process - get the data producer associated with this IN data product")
#        producer_ids, _ = self.clients.resource_registry.find_objects(in_data_product_id, PRED.hasDataProducer, RT.DataProducer, True)
#        if not producer_ids:
#            raise NotFound("No Data Producer created for this Data Product " + str(in_data_product_id))
#        if len(producer_ids) != 1:
#            raise BadRequest("Data Product should only have ONE Data Producers at this time" + str(in_data_product_id))
#        in_product_producer = producer_ids[0]
#        log.debug("DataProcessManagementService:create_data_process - get the data producer associated with this IN data product  in_product_producer: " +  str(in_product_producer))

# second - get the streams associated with this IN data products
        self.in_stream_ids = []
        for in_data_product_id in in_data_product_ids:
            log.debug(
                "DataProcessManagementService:create_data_process - get the stream associated with this IN data product"
            )
            stream_ids, _ = self.clients.resource_registry.find_objects(
                in_data_product_id, PRED.hasStream, RT.Stream, True)
            if not stream_ids:
                raise NotFound("No Stream created for this IN Data Product " +
                               str(in_data_product_id))
            if len(stream_ids) != 1:
                raise BadRequest(
                    "IN Data Product should only have ONE stream at this time"
                    + str(in_data_product_id))
            log.debug(
                "DataProcessManagementService:create_data_process - get the stream associated with this IN data product:  %s  in_stream_id: %s ",
                str(in_data_product_id), str(stream_ids[0]))
            self.in_stream_ids.append(stream_ids[0])

        # Finally - create a subscription to the input stream
        log.debug(
            "DataProcessManagementService:create_data_process - Finally - create a subscription to the input stream"
        )
        in_data_product_obj = self.clients.data_product_management.read_data_product(
            in_data_product_id)
        query = StreamQuery(stream_ids=self.in_stream_ids)
        #self.input_subscription_id = self.clients.pubsub_management.create_subscription(query=query, exchange_name=in_data_product_obj.name)
        self.input_subscription_id = self.clients.pubsub_management.create_subscription(
            query=query, exchange_name=data_process_name)
        log.debug(
            "DataProcessManagementService:create_data_process - Finally - create a subscription to the input stream   input_subscription_id"
            + str(self.input_subscription_id))

        # add the subscription id to the resource for clean up later
        data_process_obj = self.clients.resource_registry.read(data_process_id)
        data_process_obj.input_subscription_id = self.input_subscription_id
        self.clients.resource_registry.update(data_process_obj)

        procdef_ids, _ = self.clients.resource_registry.find_objects(
            data_process_definition_id,
            PRED.hasProcessDefinition,
            RT.ProcessDefinition,
            id_only=True)
        if not procdef_ids:
            raise BadRequest(
                "Cannot find associated ProcessDefinition for DataProcessDefinition id=%s"
                % data_process_definition_id)
        process_definition_id = procdef_ids[0]

        # Launch the transform process
        log.debug(
            "DataProcessManagementService:create_data_process - Launch the first transform process: "
        )
        log.debug(
            "DataProcessManagementService:create_data_process - input_subscription_id: "
            + str(self.input_subscription_id))
        log.debug(
            "DataProcessManagementService:create_data_process - out_stream_id: "
            + str(self.output_stream_dict))
        log.debug(
            "DataProcessManagementService:create_data_process - process_definition_id: "
            + str(process_definition_id))
        log.debug(
            "DataProcessManagementService:create_data_process - data_process_id: "
            + str(data_process_id))

        transform_id = self.clients.transform_management.create_transform(
            name=data_process_id,
            description=data_process_id,
            in_subscription_id=self.input_subscription_id,
            out_streams=self.output_stream_dict,
            process_definition_id=process_definition_id,
            configuration=configuration)

        log.debug(
            "DataProcessManagementService:create_data_process - transform_id: "
            + str(transform_id))

        self.clients.resource_registry.create_association(
            data_process_id, PRED.hasTransform, transform_id)
        log.debug(
            "DataProcessManagementService:create_data_process - Launch the first transform process   transform_id"
            + str(transform_id))

        # TODO: Flesh details of transform mgmt svc schedule method
        #        self.clients.transform_management_service.schedule_transform(transform_id)

        return data_process_id
Exemple #17
0
    def setUp(self):
        mock_clients = self._create_service_mock('pubsub_management')
        self.pubsub_service = PubsubManagementService()
        self.pubsub_service.clients = mock_clients
        self.pubsub_service.container = DotDict()
        self.pubsub_service.container.node = Mock()

        # save some typing
        self.mock_create = mock_clients.resource_registry.create
        self.mock_update = mock_clients.resource_registry.update
        self.mock_delete = mock_clients.resource_registry.delete
        self.mock_read = mock_clients.resource_registry.read
        self.mock_create_association = mock_clients.resource_registry.create_association
        self.mock_delete_association = mock_clients.resource_registry.delete_association
        self.mock_find_resources = mock_clients.resource_registry.find_resources
        self.mock_find_associations = mock_clients.resource_registry.find_associations
        self.mock_find_objects = mock_clients.resource_registry.find_objects

        #StreamDefinition
        self.stream_definition_id = "stream_definition_id"
        self.stream_definition = Mock()
        self.stream_definition.name = "SampleStreamDefinition"
        self.stream_definition.description = "Sample StreamDefinition In PubSub"
        self.stream_definition.container = StreamDefinitionContainer()

        # Stream
        self.stream_id = "stream_id"
        self.stream = Mock()
        self.stream.name = "SampleStream"
        self.stream.description = "Sample Stream In PubSub"
        self.stream.encoding = ""
        self.stream.original = True
        self.stream.stream_definition_id = self.stream_definition_id
        self.stream.url = ""
        self.stream.producers = ['producer1', 'producer2', 'producer3']

        #Subscription
        self.subscription_id = "subscription_id"
        self.subscription_stream_query = Mock()
        self.subscription_stream_query.name = "SampleSubscriptionStreamQuery"
        self.subscription_stream_query.description = "Sample Subscription With StreamQuery"
        self.subscription_stream_query.query = StreamQuery([self.stream_id])
        self.subscription_stream_query.exchange_name = "ExchangeName"
        self.subscription_stream_query.subscription_type = SubscriptionTypeEnum.STREAM_QUERY
        self.subscription_stream_query.is_active = False

        self.subscription_exchange_query = Mock()
        self.subscription_exchange_query.name = "SampleSubscriptionExchangeQuery"
        self.subscription_exchange_query.description = "Sample Subscription With Exchange Query"
        self.subscription_exchange_query.query = ExchangeQuery()
        self.subscription_exchange_query.exchange_name = "ExchangeName"
        self.subscription_exchange_query.subscription_type = SubscriptionTypeEnum.EXCHANGE_QUERY
        self.subscription_exchange_query.is_active = False

        #Subscription Has Stream Association
        self.association_id = "association_id"
        self.subscription_to_stream_association = Mock()
        self.subscription_to_stream_association._id = self.association_id

        self.stream_route = Mock()
        self.stream_route.routing_key = self.stream_id + '.data'
    def run_basic_transform(self):
        ''' Runs a basic example of a transform. It chains two transforms together, each add 1 to their input

        Producer -> A -> B
        Producer generates a number every four seconds and publishes it on the 'ctd_output_stream'
          the producer is acting as a CTD or instrument in this example.
        A is a basic transform that increments its input and publishes it on the 'transform_output' stream.
        B is a basic transform that receives input.
        All transforms write logging data to 'FS.TEMP/transform_output' so you can visually see activity of the transforms
        '''

        pubsub_cli = PubsubManagementServiceClient(node=self.container.node)
        tms_cli = TransformManagementServiceClient(node=self.container.node)
        procd_cli = ProcessDispatcherServiceClient(node=self.container.node)

        #-------------------------------
        # Process Definition
        #-------------------------------

        process_definition = IonObject(RT.ProcessDefinition,
                                       name='transform_process_definition')
        process_definition.executable = {
            'module': 'ion.processes.data.transforms.transform_example',
            'class': 'TransformExample'
        }
        process_definition_id = procd_cli.create_process_definition(
            process_definition)

        #-------------------------------
        # First Transform
        #-------------------------------

        # Create a dummy output stream from a 'ctd' instrument
        ctd_output_stream_id = pubsub_cli.create_stream(
            name='ctd_output_stream', original=True)

        # Create the subscription to the ctd_output_stream
        query = StreamQuery(stream_ids=[ctd_output_stream_id])
        ctd_subscription_id = pubsub_cli.create_subscription(
            query=query, exchange_name='ctd_output')

        # Create an output stream for the transform
        transform_output_stream_id = pubsub_cli.create_stream(
            name='transform_output', original=True)

        configuration = {}

        # Launch the first transform process
        transform_id = tms_cli.create_transform(
            name='basic_transform',
            in_subscription_id=ctd_subscription_id,
            out_streams={'output': transform_output_stream_id},
            process_definition_id=process_definition_id,
            configuration=configuration)
        tms_cli.activate_transform(transform_id)

        #-------------------------------
        # Second Transform
        #-------------------------------

        # Create a SUBSCRIPTION to this output stream for the second transform
        query = StreamQuery(stream_ids=[transform_output_stream_id])
        second_subscription_id = pubsub_cli.create_subscription(
            query=query, exchange_name='final_output')

        # Create a final output stream
        final_output_id = pubsub_cli.create_stream(name='final_output',
                                                   original=True)

        configuration = {}

        second_transform_id = tms_cli.create_transform(
            name='second_transform',
            in_subscription_id=second_subscription_id,
            out_streams={'output': final_output_id},
            process_definition_id=process_definition_id,
            configuration=configuration)
        tms_cli.activate_transform(second_transform_id)

        #-------------------------------
        # Producer (Sample Input)
        #-------------------------------

        # Create a producing example process
        id_p = self.container.spawn_process(
            'myproducer', 'ion.processes.data.transforms.transform_example',
            'TransformExampleProducer', {
                'process': {
                    'type': 'stream_process',
                    'publish_streams': {
                        'out_stream': ctd_output_stream_id
                    }
                },
                'stream_producer': {
                    'interval': 4000
                }
            })
        self.container.proc_manager.procs[id_p].start()
    def test_integrated_transform(self):
        '''
        This example script runs a chained three way transform:
            B
        A <
            C
        Where A is the even_odd transform (generates a stream of even and odd numbers from input)
        and B and C are the basic transforms that receive even and odd input
        '''
        cc = self.container
        assertions = self.assertTrue

        pubsub_cli = PubsubManagementServiceClient(node=cc.node)
        rr_cli = ResourceRegistryServiceClient(node=cc.node)
        tms_cli = TransformManagementServiceClient(node=cc.node)
        #-------------------------------
        # Process Definition
        #-------------------------------
        # Create the process definition for the basic transform
        process_definition = IonObject(RT.ProcessDefinition, name='basic_transform_definition')
        process_definition.executable = {
            'module': 'ion.processes.data.transforms.transform_example',
            'class':'TransformExample'
        }
        basic_transform_definition_id, _ = rr_cli.create(process_definition)

        # Create The process definition for the TransformEvenOdd
        process_definition = IonObject(RT.ProcessDefinition, name='evenodd_transform_definition')
        process_definition.executable = {
            'module': 'ion.processes.data.transforms.transform_example',
            'class':'TransformEvenOdd'
        }
        evenodd_transform_definition_id, _ = rr_cli.create(process_definition)

        #-------------------------------
        # Streams
        #-------------------------------
        streams = [pubsub_cli.create_stream() for i in xrange(5)]

        #-------------------------------
        # Subscriptions
        #-------------------------------

        query = StreamQuery(stream_ids=[streams[0]])
        input_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='input_queue')

        query = StreamQuery(stream_ids = [streams[1]]) # even output
        even_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='even_queue')

        query = StreamQuery(stream_ids = [streams[2]]) # odd output
        odd_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='odd_queue')


        #-------------------------------
        # Launch the EvenOdd Transform
        #-------------------------------

        evenodd_id = tms_cli.create_transform(name='even_odd',
            in_subscription_id=input_subscription_id,
            out_streams={'even':streams[1], 'odd':streams[2]},
            process_definition_id=evenodd_transform_definition_id,
            configuration={})
        tms_cli.activate_transform(evenodd_id)


        #-------------------------------
        # Launch the Even Processing Transform
        #-------------------------------

        even_transform_id = tms_cli.create_transform(name='even_transform',
            in_subscription_id = even_subscription_id,
            out_streams={'even_plus1':streams[3]},
            process_definition_id=basic_transform_definition_id,
            configuration={})
        tms_cli.activate_transform(even_transform_id)

        #-------------------------------
        # Launch the Odd Processing Transform
        #-------------------------------

        odd_transform_id = tms_cli.create_transform(name='odd_transform',
            in_subscription_id = odd_subscription_id,
            out_streams={'odd_plus1':streams[4]},
            process_definition_id=basic_transform_definition_id,
            configuration={})
        tms_cli.activate_transform(odd_transform_id)

        #-------------------------------
        # Set up final subscribers
        #-------------------------------

        evenplus1_subscription_id = pubsub_cli.create_subscription(
            query=StreamQuery([streams[3]]),
            exchange_name='evenplus1_queue',
            name='EvenPlus1Subscription',
            description='EvenPlus1 SubscriptionDescription'
        )
        oddplus1_subscription_id = pubsub_cli.create_subscription(
            query=StreamQuery([streams[4]]),
            exchange_name='oddplus1_queue',
            name='OddPlus1Subscription',
            description='OddPlus1 SubscriptionDescription'
        )

        total_msg_count = 2

        msgs = gevent.queue.Queue()


        def even1_message_received(message, headers):
            input = int(message.get('num'))
            assertions( (input % 2) ) # Assert it is odd (transform adds 1)
            msgs.put(True)


        def odd1_message_received(message, headers):
            input = int(message.get('num'))
            assertions(not (input % 2)) # Assert it is even
            msgs.put(True)

        subscriber_registrar = StreamSubscriberRegistrar(process=cc, node=cc.node)
        even_subscriber = subscriber_registrar.create_subscriber(exchange_name='evenplus1_queue', callback=even1_message_received)
        odd_subscriber = subscriber_registrar.create_subscriber(exchange_name='oddplus1_queue', callback=odd1_message_received)

        # Start subscribers
        even_subscriber.start()
        odd_subscriber.start()

        # Activate subscriptions
        pubsub_cli.activate_subscription(evenplus1_subscription_id)
        pubsub_cli.activate_subscription(oddplus1_subscription_id)

        #-------------------------------
        # Set up fake stream producer
        #-------------------------------

        pid = cc.spawn_process(name='dummy_process_for_test',
            module='pyon.ion.process',
            cls='SimpleProcess',
            config={})
        dummy_process = cc.proc_manager.procs[pid]

        # Normally the user does not see or create the publisher, this is part of the containers business.
        # For the test we need to set it up explicitly
        publisher_registrar = StreamPublisherRegistrar(process=dummy_process, node=cc.node)
        stream_publisher = publisher_registrar.create_publisher(stream_id=streams[0])

        #-------------------------------
        # Start test
        #-------------------------------

        # Publish a stream
        for i in xrange(total_msg_count):
            stream_publisher.publish({'num':str(i)})

        time.sleep(0.5)

        for i in xrange(total_msg_count * 2):
            try:
                msgs.get()
            except Empty:
                assertions(False, "Failed to process all messages correctly.")
    def run_external_transform(self):
        '''
        This example script illustrates how a transform can interact with the an outside process (very basic)
        it launches an external_transform example which uses the operating system command 'bc' to add 1 to the input

        Producer -> A -> 'FS.TEMP/transform_output'
        A is an external transform that spawns an OS process to increment the input by 1
        '''
        pubsub_cli = PubsubManagementServiceClient(node=self.container.node)
        tms_cli = TransformManagementServiceClient(node=self.container.node)
        procd_cli = ProcessDispatcherServiceClient(node=self.container.node)

        #-------------------------------
        # Process Definition
        #-------------------------------
        process_definition = ProcessDefinition(
            name='external_transform_definition')
        process_definition.executable[
            'module'] = 'ion.processes.data.transforms.transform_example'
        process_definition.executable['class'] = 'ExternalTransform'
        process_definition_id = procd_cli.create_process_definition(
            process_definition=process_definition)

        #-------------------------------
        # Streams
        #-------------------------------

        input_stream_id = pubsub_cli.create_stream(name='input_stream',
                                                   original=True)

        #-------------------------------
        # Subscription
        #-------------------------------

        query = StreamQuery(stream_ids=[input_stream_id])
        input_subscription_id = pubsub_cli.create_subscription(
            query=query, exchange_name='input_queue')

        #-------------------------------
        # Launch Transform
        #-------------------------------

        transform_id = tms_cli.create_transform(
            name='external_transform',
            in_subscription_id=input_subscription_id,
            process_definition_id=process_definition_id,
            configuration={})
        tms_cli.activate_transform(transform_id)

        #-------------------------------
        # Launch Producer
        #-------------------------------

        id_p = self.container.spawn_process(
            'myproducer', 'ion.processes.data.transforms.transform_example',
            'TransformExampleProducer', {
                'process': {
                    'type': 'stream_process',
                    'publish_streams': {
                        'out_stream': input_stream_id
                    }
                },
                'stream_producer': {
                    'interval': 4000
                }
            })
        self.container.proc_manager.procs[id_p].start()
    def test_raw_stream_integration(self):
        cc = self.container
        assertions = self.assertTrue

        #-----------------------------
        # Copy below here to run as a script (don't forget the imports of course!)
        #-----------------------------

        # Create some service clients...
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(
            node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(
            node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        # declare some handy variables

        datastore_name = 'test_dm_integration'

        ###
        ### In the beginning there was one stream definitions...
        ###
        # create a stream definition for the data from the ctd simulator
        raw_ctd_stream_def = SBE37_RAW_stream_definition()
        raw_ctd_stream_def_id = pubsub_management_service.create_stream_definition(
            container=raw_ctd_stream_def, name='Simulated RAW CTD data')

        ###
        ### And two process definitions...
        ###
        # one for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module': 'ion.processes.data.raw_stream_publisher',
            'class': 'RawStreamPublisher'
        }

        raw_ctd_sim_procdef_id = process_dispatcher.create_process_definition(
            process_definition=producer_definition)

        #---------------------------
        # Set up ingestion - this is an operator concern - not done by SA in a deployed system
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,
                                       datastore_profile='SCIDATA'),
            number_of_workers=1)
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)

        #---------------------------
        # Set up the producer (CTD Simulator)
        #---------------------------

        # Create the stream
        raw_ctd_stream_id = pubsub_management_service.create_stream(
            stream_definition_id=raw_ctd_stream_def_id)

        # Set up the datasets
        raw_ctd_dataset_id = dataset_management_service.create_dataset(
            stream_id=raw_ctd_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule')

        # Configure ingestion of this dataset
        raw_ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id=raw_ctd_dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=
            ingestion_configuration_id,  # you need to know the ingestion configuration id!
        )
        # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        # Start the ctd simulator to produce some data
        configuration = {
            'process': {
                'stream_id': raw_ctd_stream_id,
            }
        }
        raw_sim_pid = process_dispatcher.schedule_process(
            process_definition_id=raw_ctd_sim_procdef_id,
            configuration=configuration)

        ###
        ### Make a subscriber in the test to listen for salinity data
        ###
        raw_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery([
                raw_ctd_stream_id,
            ]),
            exchange_name='raw_test',
            name="test raw subscription",
        )

        # this is okay - even in cei mode!
        pid = cc.spawn_process(name='dummy_process_for_test',
                               module='pyon.ion.process',
                               cls='SimpleProcess',
                               config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process,
                                                         node=cc.node)

        result = gevent.event.AsyncResult()
        results = []

        def message_received(message, headers):
            # Heads
            log.warn('Raw data received!')
            results.append(message)
            if len(results) > 3:
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(
            exchange_name='raw_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        pubsub_management_service.activate_subscription(
            subscription_id=raw_subscription_id)

        # Assert that we have received data
        assertions(result.get(timeout=10))

        # stop the flow parse the messages...
        process_dispatcher.cancel_process(
            raw_sim_pid
        )  # kill the ctd simulator process - that is enough data

        gevent.sleep(1)

        for message in results:

            sha1 = message.identifiables['stream_encoding'].sha1

            data = message.identifiables['data_stream'].values

            filename = FileSystem.get_hierarchical_url(FS.CACHE, sha1, ".raw")

            with open(filename, 'r') as f:

                assertions(data == f.read())
Exemple #22
0
def instrument_test_driver(container):

    org_client = OrgManagementServiceClient(node=container.node)
    id_client = IdentityManagementServiceClient(node=container.node)

    system_actor = id_client.find_actor_identity_by_name(name=CFG.system.system_actor)
    log.info('system actor:' + system_actor._id)

    sa_header_roles = get_role_message_headers(org_client.find_all_roles_by_user(system_actor._id))


    # Names of agent data streams to be configured.
    parsed_stream_name = 'ctd_parsed'
    raw_stream_name = 'ctd_raw'

    # Driver configuration.
    #Simulator

    driver_config = {
        'svr_addr': 'localhost',
        'cmd_port': 5556,
        'evt_port': 5557,
        'dvr_mod': 'ion.agents.instrument.drivers.sbe37.sbe37_driver',
        'dvr_cls': 'SBE37Driver',
        'comms_config': {
            SBE37Channel.CTD: {
                'method':'ethernet',
                'device_addr': CFG.device.sbe37.host,
                'device_port': CFG.device.sbe37.port,
                'server_addr': 'localhost',
                'server_port': 8888
            }
        }
    }

    #Hardware

    _container_client = ContainerAgentClient(node=container.node,
        name=container.name)

# Create a pubsub client to create streams.
    _pubsub_client = PubsubManagementServiceClient(node=container.node)

    # A callback for processing subscribed-to data.
    def consume(message, headers):
        log.info('Subscriber received message: %s', str(message))

    # Create a stream subscriber registrar to create subscribers.
    subscriber_registrar = StreamSubscriberRegistrar(process=container,
        node=container.node)

    subs = []

    # Create streams for each stream named in driver.
    stream_config = {}
    for (stream_name, val) in PACKET_CONFIG.iteritems():
        stream_def = ctd_stream_definition(stream_id=None)
        stream_def_id = _pubsub_client.create_stream_definition(
            container=stream_def)
        stream_id = _pubsub_client.create_stream(
            name=stream_name,
            stream_definition_id=stream_def_id,
            original=True,
            encoding='ION R2', headers={'ion-actor-id': system_actor._id, 'ion-actor-roles': sa_header_roles })
        stream_config[stream_name] = stream_id

        # Create subscriptions for each stream.
        exchange_name = '%s_queue' % stream_name
        sub = subscriber_registrar.create_subscriber(exchange_name=exchange_name, callback=consume)
        sub.start()
        query = StreamQuery(stream_ids=[stream_id])
        sub_id = _pubsub_client.create_subscription(\
            query=query, exchange_name=exchange_name )
        _pubsub_client.activate_subscription(sub_id)
        subs.append(sub)


    # Create agent config.

    agent_resource_id = '123xyz'

    agent_config = {
        'driver_config' : driver_config,
        'stream_config' : stream_config,
        'agent'         : {'resource_id': agent_resource_id}
    }

    # Launch an instrument agent process.
    _ia_name = 'agent007'
    _ia_mod = 'ion.agents.instrument.instrument_agent'
    _ia_class = 'InstrumentAgent'
    _ia_pid = _container_client.spawn_process(name=_ia_name,
        module=_ia_mod, cls=_ia_class,
        config=agent_config)


    log.info('got pid=%s for resource_id=%s' % (str(_ia_pid), str(agent_resource_id)))
    def test_activateInstrumentStream(self):

        # Create InstrumentModel
        instModel_obj = IonObject(RT.InstrumentModel,
                                  name='SBE37IMModel',
                                  description="SBE37IMModel",
                                  model_label="SBE37IMModel")
        try:
            instModel_id = self.imsclient.create_instrument_model(
                instModel_obj)
        except BadRequest as ex:
            self.fail("failed to create new InstrumentModel: %s" % ex)
        print 'new InstrumentModel id = ', instModel_id

        # Create InstrumentAgent
        instAgent_obj = IonObject(
            RT.InstrumentAgent,
            name='agent007',
            description="SBE37IMAgent",
            driver_module="ion.agents.instrument.instrument_agent",
            driver_class="InstrumentAgent")
        try:
            instAgent_id = self.imsclient.create_instrument_agent(
                instAgent_obj)
        except BadRequest as ex:
            self.fail("failed to create new InstrumentAgent: %s" % ex)
        print 'new InstrumentAgent id = ', instAgent_id

        self.imsclient.assign_instrument_model_to_instrument_agent(
            instModel_id, instAgent_id)

        # Create InstrumentDevice
        log.debug(
            'test_activateInstrumentStream: Create instrument resource to represent the SBE37 (SA Req: L4-CI-SA-RQ-241) '
        )
        instDevice_obj = IonObject(RT.InstrumentDevice,
                                   name='SBE37IMDevice',
                                   description="SBE37IMDevice",
                                   serial_number="12345")
        try:
            instDevice_id = self.imsclient.create_instrument_device(
                instrument_device=instDevice_obj)
            self.imsclient.assign_instrument_model_to_instrument_device(
                instModel_id, instDevice_id)
        except BadRequest as ex:
            self.fail("failed to create new InstrumentDevice: %s" % ex)

        log.debug(
            "test_activateInstrumentStream: new InstrumentDevice id = %s    (SA Req: L4-CI-SA-RQ-241) ",
            instDevice_id)

        driver_config = {
            'dvr_mod': 'ion.agents.instrument.drivers.sbe37.sbe37_driver',
            'dvr_cls': 'SBE37Driver',
            'workdir': '/tmp/',
        }

        instAgentInstance_obj = IonObject(
            RT.InstrumentAgentInstance,
            name='SBE37IMAgentInstance',
            description="SBE37IMAgentInstance",
            driver_config=driver_config,
            comms_device_address='sbe37-simulator.oceanobservatories.org',
            comms_device_port=4001,
            port_agent_work_dir='/tmp/',
            port_agent_delimeter=['<<', '>>'])
        instAgentInstance_id = self.imsclient.create_instrument_agent_instance(
            instAgentInstance_obj, instAgent_id, instDevice_id)

        # create a stream definition for the data from the ctd simulator
        ctd_stream_def = SBE37_CDM_stream_definition()
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            container=ctd_stream_def)

        log.debug(
            'test_activateInstrumentStream new Stream Definition id = %s',
            instDevice_id)

        log.debug(
            'test_activateInstrumentStream Creating new CDM data product with a stream definition'
        )
        dp_obj = IonObject(RT.DataProduct,
                           name='the parsed data',
                           description='ctd stream test')
        try:
            data_product_id1 = self.dpclient.create_data_product(
                dp_obj, ctd_stream_def_id)
        except BadRequest as ex:
            self.fail("failed to create new data product: %s" % ex)
        log.debug('test_activateInstrumentStream new dp_id = %s',
                  str(data_product_id1))

        self.damsclient.assign_data_product(input_resource_id=instDevice_id,
                                            data_product_id=data_product_id1)

        self.dpclient.activate_data_product_persistence(
            data_product_id=data_product_id1,
            persist_data=True,
            persist_metadata=True)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id1,
                                                   PRED.hasStream, None, True)
        log.debug('test_activateInstrumentStream Data product streams1 = %s',
                  str(stream_ids))

        simdata_subscription_id = self.pubsubcli.create_subscription(
            query=StreamQuery([stream_ids[0]]),
            exchange_name='Sim_data_queue',
            name='SimDataSubscription',
            description='SimData SubscriptionDescription')

        def simdata_message_received(message, headers):
            input = str(message)
            log.debug("test_activateInstrumentStream: granule received: %s",
                      input)

        subscriber_registrar = StreamSubscriberRegistrar(
            process=self.container, node=self.container.node)
        simdata_subscriber = subscriber_registrar.create_subscriber(
            exchange_name='Sim_data_queue', callback=simdata_message_received)

        # Start subscribers
        simdata_subscriber.start()

        # Activate subscriptions
        self.pubsubcli.activate_subscription(simdata_subscription_id)

        log.debug(
            'test_activateInstrumentStream Creating new RAW data product with a stream definition'
        )
        raw_stream_def = SBE37_RAW_stream_definition()
        raw_stream_def_id = self.pubsubcli.create_stream_definition(
            container=raw_stream_def)

        dp_obj = IonObject(RT.DataProduct,
                           name='the raw data',
                           description='raw stream test')
        try:
            data_product_id2 = self.dpclient.create_data_product(
                dp_obj, raw_stream_def_id)
        except BadRequest as ex:
            self.fail("failed to create new data product: %s" % ex)
        log.debug('test_activateInstrumentStream new dp_id = %s',
                  str(data_product_id2))

        self.damsclient.assign_data_product(input_resource_id=instDevice_id,
                                            data_product_id=data_product_id2)

        self.dpclient.activate_data_product_persistence(
            data_product_id=data_product_id2,
            persist_data=True,
            persist_metadata=True)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id2,
                                                   PRED.hasStream, None, True)
        log.debug('test_activateInstrumentStream Data product streams2 = %s',
                  str(stream_ids))

        self.imsclient.start_instrument_agent_instance(
            instrument_agent_instance_id=instAgentInstance_id)

        inst_agent_instance_obj = self.imsclient.read_instrument_agent_instance(
            instAgentInstance_id)
        log.debug(
            'test_activateInstrumentStream Instrument agent instance obj: = %s',
            str(inst_agent_instance_obj))

        # Start a resource agent client to talk with the instrument agent.
        #self._ia_client = ResourceAgentClient('123xyz', name=inst_agent_instance_obj.agent_process_id,  process=FakeProcess())
        self._ia_client = ResourceAgentClient(instDevice_id,
                                              process=FakeProcess())
        log.debug('test_activateInstrumentStream: got ia client %s',
                  str(self._ia_client))
        log.debug("test_activateInstrumentStream: got ia client %s",
                  str(self._ia_client))

        cmd = AgentCommand(command='initialize')
        retval = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrumentStream: initialize %s", str(retval))

        time.sleep(2)

        log.debug(
            "test_activateInstrumentStream: Sending go_active command (L4-CI-SA-RQ-334)"
        )
        cmd = AgentCommand(command='go_active')
        reply = self._ia_client.execute_agent(cmd)
        log.debug(
            "test_activateInstrumentStream: return value from go_active %s",
            str(reply))
        time.sleep(2)
        cmd = AgentCommand(command='get_current_state')
        retval = self._ia_client.execute_agent(cmd)
        state = retval.result
        log.debug(
            "test_activateInstrumentStream: current state after sending go_active command %s    (L4-CI-SA-RQ-334)",
            str(state))

        cmd = AgentCommand(command='run')
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrument: run %s", str(reply))
        time.sleep(2)

        log.debug("test_activateInstrumentStream: calling go_streaming ")
        cmd = AgentCommand(command='go_streaming')
        reply = self._ia_client.execute(cmd)
        log.debug("test_activateInstrumentStream: return from go_streaming %s",
                  str(reply))

        time.sleep(15)

        log.debug("test_activateInstrumentStream: calling go_observatory")
        cmd = AgentCommand(command='go_observatory')
        reply = self._ia_client.execute(cmd)
        log.debug(
            "test_activateInstrumentStream: return from go_observatory   %s",
            str(reply))
        time.sleep(2)

        log.debug("test_activateInstrumentStream: calling reset ")
        cmd = AgentCommand(command='reset')
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrumentStream: return from reset %s",
                  str(reply))
        time.sleep(2)

        #-------------------------------
        # Deactivate InstrumentAgentInstance
        #-------------------------------
        self.imsclient.stop_instrument_agent_instance(
            instrument_agent_instance_id=instAgentInstance_id)