Python DataAcquisitionManagementServiceClient.list_qc_references Examples

Programming Language: Python

Namespace/Package Name: interface.services.sa.idata_acquisition_management_service

Class/Type: DataAcquisitionManagementServiceClient

Method/Function: list_qc_references

Examples at hotexamples.com: 2

Python DataAcquisitionManagementServiceClient.list_qc_references - 2 examples found. These are the top rated real world Python examples of interface.services.sa.idata_acquisition_management_service.DataAcquisitionManagementServiceClient.list_qc_references extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DataAcquisitionManagementServiceClient(30)

assign_data_product(30)

register_external_data_set(12)

create_external_dataset_agent_instance(11)

create_external_dataset_agent(11)

create_external_dataset(11)

create_external_data_provider(10)

create_data_source(9)

create_data_source_model(9)

assign_data_source_to_external_data_provider(7)

assign_data_source_to_data_model(7)

assign_external_dataset_to_data_source(7)

register_instrument(5)

start_external_dataset_agent_instance(4)

register_process(4)

stop_external_dataset_agent_instance(3)

read_external_data_provider(2)

force_delete_external_data_provider(2)

force_delete_external_dataset(2)

force_delete_external_dataset_agent(2)

read_data_source(2)

unassign_external_dataset_from_data_source(2)

read_external_dataset_agent_instance(2)

unregister_instrument(2)

unregister_external_data_set(2)

unassign_data_product(2)

unassign_data_source_from_data_model(2)

unassign_data_source_from_external_data_provider(2)

force_delete_data_source_model(2)

update_data_source(2)

delete_data_source(2)

create_external_dataset_model(2)

delete_external_dataset_agent(2)

delete_external_dataset(2)

assign_external_dataset_to_agent_instance(2)

delete_external_data_provider(2)

create_data_source_agent(2)

delete_data_source_model(2)

create_data_source_agent_instance(2)

force_delete_data_source(2)

unregister_process(1)

assign_data_product_source(1)

assign_eoi_resources(1)

unassign_external_dataset_from_agent_instance(1)

unassign_data_product_source(1)

create_external_data_agent_instance(1)

force_delete_data_source_agent_instance(1)

create_parser(1)

retrieve_external_dataset_agent_instance(1)

delete_data_source_agent(1)

Example #1

Show file

File: test_data_product_management_service_integration.py Project: ateranishi/coi-services

class TestDataProductManagementServiceIntegration(IonIntegrationTestCase):

    def setUp(self):
        # Start container
        #print 'instantiating container'
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.dpsc_cli           = DataProductManagementServiceClient()
        self.rrclient           = ResourceRegistryServiceClient()
        self.damsclient         = DataAcquisitionManagementServiceClient()
        self.pubsubcli          = PubsubManagementServiceClient()
        self.ingestclient       = IngestionManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.unsc               = UserNotificationServiceClient()
        self.data_retriever     = DataRetrieverServiceClient()

        #------------------------------------------
        # Create the environment
        #------------------------------------------

        datastore_name = CACHE_DATASTORE_NAME
        self.db = self.container.datastore_manager.get_datastore(datastore_name)
        self.stream_def_id = self.pubsubcli.create_stream_definition(name='SBE37_CDM')

        self.process_definitions  = {}
        ingestion_worker_definition = ProcessDefinition(name='ingestion worker')
        ingestion_worker_definition.executable = {
            'module':'ion.processes.data.ingestion.science_granule_ingestion_worker',
            'class' :'ScienceGranuleIngestionWorker'
        }
        process_definition_id = self.process_dispatcher.create_process_definition(process_definition=ingestion_worker_definition)
        self.process_definitions['ingestion_worker'] = process_definition_id

        self.pids = []
        self.exchange_points = []
        self.exchange_names = []

        #------------------------------------------------------------------------------------------------
        # First launch the ingestors
        #------------------------------------------------------------------------------------------------
        self.exchange_space       = 'science_granule_ingestion'
        self.exchange_point       = 'science_data'
        config = DotDict()
        config.process.datastore_name = 'datasets'
        config.process.queue_name = self.exchange_space

        self.exchange_names.append(self.exchange_space)
        self.exchange_points.append(self.exchange_point)

        pid = self.process_dispatcher.schedule_process(self.process_definitions['ingestion_worker'],configuration=config)
        log.debug("the ingestion worker process id: %s", pid)
        self.pids.append(pid)

        self.addCleanup(self.cleaning_up)

    def cleaning_up(self):
        for pid in self.pids:
            log.debug("number of pids to be terminated: %s", len(self.pids))
            try:
                self.process_dispatcher.cancel_process(pid)
                log.debug("Terminated the process: %s", pid)
            except:
                log.debug("could not terminate the process id: %s" % pid)
        IngestionManagementIntTest.clean_subscriptions()

        for xn in self.exchange_names:
            xni = self.container.ex_manager.create_xn_queue(xn)
            xni.delete()
        for xp in self.exchange_points:
            xpi = self.container.ex_manager.create_xp(xp)
            xpi.delete()

    def get_datastore(self, dataset_id):
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore


    @attr('EXT')
    @attr('PREP')
    def test_create_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        parameter_dictionary = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict')
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=parameter_dictionary._id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------

        # Generic time-series data domain creation
        tdom, sdom = time_series_domain()



        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp',
            temporal_domain = tdom.dump(), 
            spatial_domain = sdom.dump())

        dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 10.0
        dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -10.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 10.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -10.0
        dp_obj.ooi_product_name = "PRODNAME"

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product( data_product= dp_obj,
                                            stream_definition_id=ctd_stream_def_id)
        # Assert that the data product has an associated stream at this stage
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True)
        self.assertNotEquals(len(stream_ids), 0)

        # Assert that the data product has an associated stream def at this stage
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStreamDefinition, RT.StreamDefinition, True)
        self.assertNotEquals(len(stream_ids), 0)

        self.dpsc_cli.activate_data_product_persistence(dp_id)

        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)
        self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0)
        log.debug('Created data product %s', dp_obj)
        #------------------------------------------------------------------------------------------------
        # test creating a new data product with  a stream definition
        #------------------------------------------------------------------------------------------------
        log.debug('Creating new data product with a stream definition')
        dp_obj = IonObject(RT.DataProduct,
            name='DP2',
            description='some new dp',
            temporal_domain = tdom.dump(),
            spatial_domain = sdom.dump())

        dp_id2 = self.dpsc_cli.create_data_product(dp_obj, ctd_stream_def_id)
        self.dpsc_cli.activate_data_product_persistence(dp_id2)
        log.debug('new dp_id = %s' % dp_id2)

        #------------------------------------------------------------------------------------------------
        #make sure data product is associated with stream def
        #------------------------------------------------------------------------------------------------
        streamdefs = []
        streams, _ = self.rrclient.find_objects(dp_id2, PRED.hasStream, RT.Stream, True)
        for s in streams:
            log.debug("Checking stream %s" % s)
            sdefs, _ = self.rrclient.find_objects(s, PRED.hasStreamDefinition, RT.StreamDefinition, True)
            for sd in sdefs:
                log.debug("Checking streamdef %s" % sd)
                streamdefs.append(sd)
        self.assertIn(ctd_stream_def_id, streamdefs)

        group_names = self.dpsc_cli.get_data_product_group_list()
        self.assertIn("PRODNAME", group_names)


        # test reading a non-existent data product
        log.debug('reading non-existent data product')

        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product('some_fake_id')

        # update a data product (tests read also)
        log.debug('Updating data product')
        # first get the existing dp object
        dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # now tweak the object
        dp_obj.description = 'the very first dp'
        dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 20.0
        dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -20.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 20.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -20.0
        # now write the dp back to the registry
        update_result = self.dpsc_cli.update_data_product(dp_obj)


        # now get the dp back to see if it was updated
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertEquals(dp_obj.description,'the very first dp')
        self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0)
        log.debug('Updated data product %s', dp_obj)

        #test extension
        extended_product = self.dpsc_cli.get_data_product_extension(dp_id)
        self.assertEqual(dp_id, extended_product._id)
        self.assertEqual(ComputedValueAvailability.PROVIDED,
                         extended_product.computed.product_download_size_estimated.status)
        self.assertEqual(0, extended_product.computed.product_download_size_estimated.value)

        self.assertEqual(ComputedValueAvailability.PROVIDED,
                         extended_product.computed.parameters.status)
        #log.debug("test_create_data_product: parameters %s" % extended_product.computed.parameters.value)


        def ion_object_encoder(obj):
            return obj.__dict__


        #test prepare for create
        data_product_data = self.dpsc_cli.prepare_data_product_support()

        #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2)

        self.assertEqual(data_product_data._id, "")
        self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport)
        self.assertEqual(len(data_product_data.associations['StreamDefinition'].resources), 2)
        self.assertEqual(len(data_product_data.associations['Dataset'].resources), 0)
        self.assertEqual(len(data_product_data.associations['StreamDefinition'].associated_resources), 0)
        self.assertEqual(len(data_product_data.associations['Dataset'].associated_resources), 0)

        #test prepare for update
        data_product_data = self.dpsc_cli.prepare_data_product_support(dp_id)

        #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2)

        self.assertEqual(data_product_data._id, dp_id)
        self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport)
        self.assertEqual(len(data_product_data.associations['StreamDefinition'].resources), 2)

        self.assertEqual(len(data_product_data.associations['Dataset'].resources), 1)

        self.assertEqual(len(data_product_data.associations['StreamDefinition'].associated_resources), 1)
        self.assertEqual(data_product_data.associations['StreamDefinition'].associated_resources[0].s, dp_id)

        self.assertEqual(len(data_product_data.associations['Dataset'].associated_resources), 1)
        self.assertEqual(data_product_data.associations['Dataset'].associated_resources[0].s, dp_id)

        # now 'delete' the data product
        log.debug("deleting data product: %s" % dp_id)
        self.dpsc_cli.delete_data_product(dp_id)

        # Assert that there are no associated streams leftover after deleting the data product
        stream_ids, assoc_ids = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True)
        self.assertEquals(len(stream_ids), 0)
        self.assertEquals(len(assoc_ids), 0)

        self.dpsc_cli.force_delete_data_product(dp_id)

        # now try to get the deleted dp object
        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # Get the events corresponding to the data product
        ret = self.unsc.get_recent_events(resource_id=dp_id)
        events = ret.value

        for event in events:
            log.debug("event time: %s" % event.ts_created)

        self.assertTrue(len(events) > 0)

    def test_data_product_stream_def(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id)

        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()

        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp',
            temporal_domain = tdom,
            spatial_domain = sdom)
        dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj,
            stream_definition_id=ctd_stream_def_id)

        stream_def_id = self.dpsc_cli.get_data_product_stream_definition(dp_id)
        self.assertEquals(ctd_stream_def_id, stream_def_id)


    def test_derived_data_product(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, ctd_stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='Instrument DP', temporal_domain=tdom.dump(), spatial_domain=sdom.dump())
        dp_id = self.dpsc_cli.create_data_product(dp, stream_definition_id=ctd_stream_def_id)
        self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id)

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id)


        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" % str(dp_id))
        dataset_id = dataset_ids[0]
        
        # Make the derived data product
        simple_stream_def_id = self.pubsubcli.create_stream_definition(name='TEMPWAT stream def', parameter_dictionary_id=pdict_id, available_fields=['time','temp'])
        tempwat_dp = DataProduct(name='TEMPWAT')
        tempwat_dp_id = self.dpsc_cli.create_data_product(tempwat_dp, stream_definition_id=simple_stream_def_id, parent_data_product_id=dp_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id)
        # Check that the streams associated with the data product are persisted with
        stream_ids, _ =  self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        rdt['pressure'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id,route)
        
        dataset_modified = Event()
        def cb(*args, **kwargs):
            dataset_modified.set()
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id, PRED.hasDataset, id_only=True)
        tempwat_dataset_id = tempwat_dataset_ids[0]
        granule = self.data_retriever.retrieve(tempwat_dataset_id, delivery_format=simple_stream_def_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        self.assertEquals(set(rdt.fields), set(['time','temp']))


    def test_activate_suspend_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------
        # Construct temporal and spatial Coordinate Reference System objects
        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()

        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp',
            temporal_domain = tdom,
            spatial_domain = sdom)

        log.debug("Created an IonObject for a data product: %s" % dp_obj)

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj,
            stream_definition_id=ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test activate and suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.activate_data_product_persistence(dp_id)
        
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" % str(dp_id))
        dataset_id = dataset_ids[0]


        # Check that the streams associated with the data product are persisted with
        stream_ids, _ =  self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id,route)
        
        dataset_modified = Event()
        def cb(*args, **kwargs):
            dataset_modified.set()
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_ids[0])
        self.assertIsInstance(replay_data, Granule)

        log.debug("The data retriever was able to replay the dataset that was attached to the data product "
                  "we wanted to be persisted. Therefore the data product was indeed persisted with "
                  "otherwise we could not have retrieved its dataset using the data retriever. Therefore "
                  "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'")

        data_product_object = self.rrclient.read(dp_id)
        self.assertEquals(data_product_object.name,'DP1')
        self.assertEquals(data_product_object.description,'some new dp')

        log.debug("Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. "
                  " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the "
                  "resource registry, name='%s', desc='%s'" % (dp_obj.name, dp_obj.description,data_product_object.name,
                                                           data_product_object.description))

        #------------------------------------------------------------------------------------------------
        # test suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.suspend_data_product_persistence(dp_id)


        dataset_modified.clear()

        rdt['time'] = np.arange(20,40)

        publisher.publish(rdt.to_granule())
        self.assertFalse(dataset_modified.wait(2))

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        dataset_modified.clear()

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_modified.wait(30))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt['time'], np.arange(40))


        dataset_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasDataset, id_only=True)
        self.assertEquals(len(dataset_ids), 1)

        self.dpsc_cli.suspend_data_product_persistence(dp_id)
        self.dpsc_cli.force_delete_data_product(dp_id)
        # now try to get the deleted dp object

        with self.assertRaises(NotFound):
            dp_obj = self.rrclient.read(dp_id)

    def test_lookup_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()
        stream_def_id = self.pubsubcli.create_stream_definition('lookup', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, stream_def_id)

        data_product = DataProduct(name='lookup data product')
        tdom, sdom = time_series_domain()
        data_product.temporal_domain = tdom.dump()
        data_product.spatial_domain = sdom.dump()

        data_product_id = self.dpsc_cli.create_data_product(data_product, stream_definition_id=stream_def_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, data_product_id)
        data_producer = DataProducer(name='producer')
        data_producer.producer_context = DataProcessProducerContext()
        data_producer.producer_context.configuration['qc_keys'] = ['offset_document']
        data_producer_id, _ = self.rrclient.create(data_producer)
        self.addCleanup(self.rrclient.delete, data_producer_id)
        assoc,_ = self.rrclient.create_association(subject=data_product_id, object=data_producer_id, predicate=PRED.hasDataProducer)
        self.addCleanup(self.rrclient.delete_association, assoc)

        document_keys = self.damsclient.list_qc_references(data_product_id)
            
        self.assertEquals(document_keys, ['offset_document'])
        svm = StoredValueManager(self.container)
        svm.stored_value_cas('offset_document', {'offset_a':2.0})
        self.dpsc_cli.activate_data_product_persistence(data_product_id)
        dataset_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasDataset, id_only=True)
        dataset_id = dataset_ids[0]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()

        stream_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(granule)

        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['temp'], rdt2['temp'])
        np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0]))


        svm.stored_value_cas('updated_document', {'offset_a':3.0})
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ep = EventPublisher(event_type=OT.ExternalReferencesUpdatedEvent)
        ep.publish_event(origin=data_product_id, reference_keys=['updated_document'])

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [1]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()
        gevent.sleep(2) # Yield so that the event goes through
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt2['temp'],np.array([20.,20.]))
        np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0,23.0]))

Example #2

Show file

File: test_data_product_management_service_integration.py Project: sfoley/coi-services

class TestDataProductManagementServiceIntegration(IonIntegrationTestCase):
    def setUp(self):
        # Start container
        #print 'instantiating container'
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.dpsc_cli = DataProductManagementServiceClient()
        self.rrclient = ResourceRegistryServiceClient()
        self.damsclient = DataAcquisitionManagementServiceClient()
        self.pubsubcli = PubsubManagementServiceClient()
        self.ingestclient = IngestionManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.unsc = UserNotificationServiceClient()
        self.data_retriever = DataRetrieverServiceClient()

        #------------------------------------------
        # Create the environment
        #------------------------------------------

        datastore_name = CACHE_DATASTORE_NAME
        self.db = self.container.datastore_manager.get_datastore(
            datastore_name)
        self.stream_def_id = self.pubsubcli.create_stream_definition(
            name='SBE37_CDM')

        self.process_definitions = {}
        ingestion_worker_definition = ProcessDefinition(
            name='ingestion worker')
        ingestion_worker_definition.executable = {
            'module':
            'ion.processes.data.ingestion.science_granule_ingestion_worker',
            'class': 'ScienceGranuleIngestionWorker'
        }
        process_definition_id = self.process_dispatcher.create_process_definition(
            process_definition=ingestion_worker_definition)
        self.process_definitions['ingestion_worker'] = process_definition_id

        self.pids = []
        self.exchange_points = []
        self.exchange_names = []

        #------------------------------------------------------------------------------------------------
        # First launch the ingestors
        #------------------------------------------------------------------------------------------------
        self.exchange_space = 'science_granule_ingestion'
        self.exchange_point = 'science_data'
        config = DotDict()
        config.process.datastore_name = 'datasets'
        config.process.queue_name = self.exchange_space

        self.exchange_names.append(self.exchange_space)
        self.exchange_points.append(self.exchange_point)

        pid = self.process_dispatcher.schedule_process(
            self.process_definitions['ingestion_worker'], configuration=config)
        log.debug("the ingestion worker process id: %s", pid)
        self.pids.append(pid)

        self.addCleanup(self.cleaning_up)

    def cleaning_up(self):
        for pid in self.pids:
            log.debug("number of pids to be terminated: %s", len(self.pids))
            try:
                self.process_dispatcher.cancel_process(pid)
                log.debug("Terminated the process: %s", pid)
            except:
                log.debug("could not terminate the process id: %s" % pid)
        IngestionManagementIntTest.clean_subscriptions()

        for xn in self.exchange_names:
            xni = self.container.ex_manager.create_xn_queue(xn)
            xni.delete()
        for xp in self.exchange_points:
            xpi = self.container.ex_manager.create_xp(xp)
            xpi.delete()

    def get_datastore(self, dataset_id):
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(
            datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore

    @attr('EXT')
    @attr('PREP')
    def test_create_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        parameter_dictionary = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict')
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='Simulated CTD data',
            parameter_dictionary_id=parameter_dictionary._id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------

        # Generic time-series data domain creation
        tdom, sdom = time_series_domain()

        dp_obj = IonObject(RT.DataProduct,
                           name='DP1',
                           description='some new dp',
                           temporal_domain=tdom.dump(),
                           spatial_domain=sdom.dump())

        dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 10.0
        dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -10.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 10.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -10.0
        dp_obj.ooi_product_name = "PRODNAME"

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(
            data_product=dp_obj, stream_definition_id=ctd_stream_def_id)
        # Assert that the data product has an associated stream at this stage
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                   RT.Stream, True)
        self.assertNotEquals(len(stream_ids), 0)

        # Assert that the data product has an associated stream def at this stage
        stream_ids, _ = self.rrclient.find_objects(dp_id,
                                                   PRED.hasStreamDefinition,
                                                   RT.StreamDefinition, True)
        self.assertNotEquals(len(stream_ids), 0)

        self.dpsc_cli.activate_data_product_persistence(dp_id)

        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)
        self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0)
        log.debug('Created data product %s', dp_obj)
        #------------------------------------------------------------------------------------------------
        # test creating a new data product with  a stream definition
        #------------------------------------------------------------------------------------------------
        log.debug('Creating new data product with a stream definition')
        dp_obj = IonObject(RT.DataProduct,
                           name='DP2',
                           description='some new dp',
                           temporal_domain=tdom.dump(),
                           spatial_domain=sdom.dump())

        dp_id2 = self.dpsc_cli.create_data_product(dp_obj, ctd_stream_def_id)
        self.dpsc_cli.activate_data_product_persistence(dp_id2)
        log.debug('new dp_id = %s' % dp_id2)

        #------------------------------------------------------------------------------------------------
        #make sure data product is associated with stream def
        #------------------------------------------------------------------------------------------------
        streamdefs = []
        streams, _ = self.rrclient.find_objects(dp_id2, PRED.hasStream,
                                                RT.Stream, True)
        for s in streams:
            log.debug("Checking stream %s" % s)
            sdefs, _ = self.rrclient.find_objects(s, PRED.hasStreamDefinition,
                                                  RT.StreamDefinition, True)
            for sd in sdefs:
                log.debug("Checking streamdef %s" % sd)
                streamdefs.append(sd)
        self.assertIn(ctd_stream_def_id, streamdefs)

        group_names = self.dpsc_cli.get_data_product_group_list()
        self.assertIn("PRODNAME", group_names)

        # test reading a non-existent data product
        log.debug('reading non-existent data product')

        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product('some_fake_id')

        # update a data product (tests read also)
        log.debug('Updating data product')
        # first get the existing dp object
        dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # now tweak the object
        dp_obj.description = 'the very first dp'
        dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 20.0
        dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -20.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 20.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -20.0
        # now write the dp back to the registry
        update_result = self.dpsc_cli.update_data_product(dp_obj)

        # now get the dp back to see if it was updated
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertEquals(dp_obj.description, 'the very first dp')
        self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0)
        log.debug('Updated data product %s', dp_obj)

        #test extension
        extended_product = self.dpsc_cli.get_data_product_extension(dp_id)
        self.assertEqual(dp_id, extended_product._id)
        self.assertEqual(
            ComputedValueAvailability.PROVIDED,
            extended_product.computed.product_download_size_estimated.status)
        self.assertEqual(
            0, extended_product.computed.product_download_size_estimated.value)

        self.assertEqual(ComputedValueAvailability.PROVIDED,
                         extended_product.computed.parameters.status)

        #log.debug("test_create_data_product: parameters %s" % extended_product.computed.parameters.value)

        def ion_object_encoder(obj):
            return obj.__dict__

        #test prepare for create
        data_product_data = self.dpsc_cli.prepare_data_product_support()

        #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2)

        self.assertEqual(data_product_data._id, "")
        self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport)
        self.assertEqual(
            len(data_product_data.associations['StreamDefinition'].resources),
            2)
        self.assertEqual(
            len(data_product_data.associations['Dataset'].resources), 0)
        self.assertEqual(
            len(data_product_data.associations['StreamDefinition'].
                associated_resources), 0)
        self.assertEqual(
            len(data_product_data.associations['Dataset'].associated_resources
                ), 0)

        #test prepare for update
        data_product_data = self.dpsc_cli.prepare_data_product_support(dp_id)

        #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2)

        self.assertEqual(data_product_data._id, dp_id)
        self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport)
        self.assertEqual(
            len(data_product_data.associations['StreamDefinition'].resources),
            2)

        self.assertEqual(
            len(data_product_data.associations['Dataset'].resources), 1)

        self.assertEqual(
            len(data_product_data.associations['StreamDefinition'].
                associated_resources), 1)
        self.assertEqual(
            data_product_data.associations['StreamDefinition'].
            associated_resources[0].s, dp_id)

        self.assertEqual(
            len(data_product_data.associations['Dataset'].associated_resources
                ), 1)
        self.assertEqual(
            data_product_data.associations['Dataset'].associated_resources[0].
            s, dp_id)

        # now 'delete' the data product
        log.debug("deleting data product: %s" % dp_id)
        self.dpsc_cli.delete_data_product(dp_id)

        # Assert that there are no associated streams leftover after deleting the data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            dp_id, PRED.hasStream, RT.Stream, True)
        self.assertEquals(len(stream_ids), 0)
        self.assertEquals(len(assoc_ids), 0)

        self.dpsc_cli.force_delete_data_product(dp_id)

        # now try to get the deleted dp object
        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # Get the events corresponding to the data product
        ret = self.unsc.get_recent_events(resource_id=dp_id)
        events = ret.value

        for event in events:
            log.debug("event time: %s" % event.ts_created)

        self.assertTrue(len(events) > 0)

    def test_data_product_stream_def(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='Simulated CTD data', parameter_dictionary_id=pdict_id)

        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()

        dp_obj = IonObject(RT.DataProduct,
                           name='DP1',
                           description='some new dp',
                           temporal_domain=tdom,
                           spatial_domain=sdom)
        dp_id = self.dpsc_cli.create_data_product(
            data_product=dp_obj, stream_definition_id=ctd_stream_def_id)

        stream_def_id = self.dpsc_cli.get_data_product_stream_definition(dp_id)
        self.assertEquals(ctd_stream_def_id, stream_def_id)

    def test_derived_data_product(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition,
                        ctd_stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='Instrument DP',
                         temporal_domain=tdom.dump(),
                         spatial_domain=sdom.dump())
        dp_id = self.dpsc_cli.create_data_product(
            dp, stream_definition_id=ctd_stream_def_id)
        self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id)

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" %
                           str(dp_id))
        dataset_id = dataset_ids[0]

        # Make the derived data product
        simple_stream_def_id = self.pubsubcli.create_stream_definition(
            name='TEMPWAT stream def',
            parameter_dictionary_id=pdict_id,
            available_fields=['time', 'temp'])
        tempwat_dp = DataProduct(name='TEMPWAT')
        tempwat_dp_id = self.dpsc_cli.create_data_product(
            tempwat_dp,
            stream_definition_id=simple_stream_def_id,
            parent_data_product_id=dp_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id)
        self.dpsc_cli.activate_data_product_persistence(tempwat_dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence,
                        tempwat_dp_id)
        # Check that the streams associated with the data product are persisted with
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                   RT.Stream, True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        rdt['pressure'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id, route)

        dataset_modified = Event()

        def cb(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb,
                             origin=dataset_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id,
                                                            PRED.hasDataset,
                                                            id_only=True)
        tempwat_dataset_id = tempwat_dataset_ids[0]
        granule = self.data_retriever.retrieve(
            tempwat_dataset_id, delivery_format=simple_stream_def_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        self.assertEquals(set(rdt.fields), set(['time', 'temp']))

    def test_activate_suspend_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='Simulated CTD data', parameter_dictionary_id=pdict_id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------
        # Construct temporal and spatial Coordinate Reference System objects
        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()

        dp_obj = IonObject(RT.DataProduct,
                           name='DP1',
                           description='some new dp',
                           temporal_domain=tdom,
                           spatial_domain=sdom)

        log.debug("Created an IonObject for a data product: %s" % dp_obj)

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(
            data_product=dp_obj, stream_definition_id=ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test activate and suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.activate_data_product_persistence(dp_id)

        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" %
                           str(dp_id))
        dataset_id = dataset_ids[0]

        # Check that the streams associated with the data product are persisted with
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                   RT.Stream, True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id, route)

        dataset_modified = Event()

        def cb(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb,
                             origin=dataset_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_ids[0])
        self.assertIsInstance(replay_data, Granule)

        log.debug(
            "The data retriever was able to replay the dataset that was attached to the data product "
            "we wanted to be persisted. Therefore the data product was indeed persisted with "
            "otherwise we could not have retrieved its dataset using the data retriever. Therefore "
            "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'"
        )

        data_product_object = self.rrclient.read(dp_id)
        self.assertEquals(data_product_object.name, 'DP1')
        self.assertEquals(data_product_object.description, 'some new dp')

        log.debug(
            "Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. "
            " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the "
            "resource registry, name='%s', desc='%s'" %
            (dp_obj.name, dp_obj.description, data_product_object.name,
             data_product_object.description))

        #------------------------------------------------------------------------------------------------
        # test suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.suspend_data_product_persistence(dp_id)

        dataset_modified.clear()

        rdt['time'] = np.arange(20, 40)

        publisher.publish(rdt.to_granule())
        self.assertFalse(dataset_modified.wait(2))

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        dataset_modified.clear()

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_modified.wait(30))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt['time'], np.arange(40))

        dataset_ids, _ = self.rrclient.find_objects(dp_id,
                                                    PRED.hasDataset,
                                                    id_only=True)
        self.assertEquals(len(dataset_ids), 1)

        self.dpsc_cli.suspend_data_product_persistence(dp_id)
        self.dpsc_cli.force_delete_data_product(dp_id)
        # now try to get the deleted dp object

        with self.assertRaises(NotFound):
            dp_obj = self.rrclient.read(dp_id)

    def test_lookup_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()
        stream_def_id = self.pubsubcli.create_stream_definition(
            'lookup', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, stream_def_id)

        data_product = DataProduct(name='lookup data product')
        tdom, sdom = time_series_domain()
        data_product.temporal_domain = tdom.dump()
        data_product.spatial_domain = sdom.dump()

        data_product_id = self.dpsc_cli.create_data_product(
            data_product, stream_definition_id=stream_def_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, data_product_id)
        data_producer = DataProducer(name='producer')
        data_producer.producer_context = DataProcessProducerContext()
        data_producer.producer_context.configuration['qc_keys'] = [
            'offset_document'
        ]
        data_producer_id, _ = self.rrclient.create(data_producer)
        self.addCleanup(self.rrclient.delete, data_producer_id)
        assoc, _ = self.rrclient.create_association(
            subject=data_product_id,
            object=data_producer_id,
            predicate=PRED.hasDataProducer)
        self.addCleanup(self.rrclient.delete_association, assoc)

        document_keys = self.damsclient.list_qc_references(data_product_id)

        self.assertEquals(document_keys, ['offset_document'])
        svm = StoredValueManager(self.container)
        svm.stored_value_cas('offset_document', {'offset_a': 2.0})
        self.dpsc_cli.activate_data_product_persistence(data_product_id)
        dataset_ids, _ = self.rrclient.find_objects(subject=data_product_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        dataset_id = dataset_ids[0]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()

        stream_ids, _ = self.rrclient.find_objects(subject=data_product_id,
                                                   predicate=PRED.hasStream,
                                                   id_only=True)
        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(granule)

        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['temp'], rdt2['temp'])
        np.testing.assert_array_almost_equal(rdt2['calibrated'],
                                             np.array([22.0]))

        svm.stored_value_cas('updated_document', {'offset_a': 3.0})
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ep = EventPublisher(event_type=OT.ExternalReferencesUpdatedEvent)
        ep.publish_event(origin=data_product_id,
                         reference_keys=['updated_document'])

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [1]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()
        gevent.sleep(2)  # Yield so that the event goes through
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt2['temp'], np.array([20., 20.]))
        np.testing.assert_array_almost_equal(rdt2['calibrated'],
                                             np.array([22.0, 23.0]))