def test_data_product_search(self): # Create the dataproduct dp = DataProduct(name='test_product') dp.data_format.name = 'test_signal' dp.data_format.description = 'test signal' dp.data_format.character_set = 'utf8' dp.data_format.nominal_sampling_rate_maximum = '44000' dp.data_format.nominal_sampling_rate_minimum = '44000' dp.data_product_level = 'basic' dp_id, _ = self.rr.create(dp) search_string = "search 'data_format.name' is 'test_signal' from 'data_products_index'" results = self.poll(9, self.discovery.parse, search_string) self.assertIsNotNone(results, 'Results not found') self.assertTrue(results[0]['_id'] == dp_id) search_string = "search 'data_product_level' is 'basic' from 'data_products_index'" results = self.poll(9, self.discovery.parse, search_string) self.assertIsNotNone(results, 'Results not found') self.assertTrue(results[0]['_id'] == dp_id) search_string = "search 'data_format.character_set' is 'utf8' from 'data_products_index'" results = self.poll(9, self.discovery.parse, search_string) self.assertIsNotNone(results, 'Results not found') self.assertTrue(results[0]['_id'] == dp_id)
def test_data_product_search(self): # Create the dataproduct dp = DataProduct(name='test_product') dp.data_format.name = 'test_signal' dp.data_format.description = 'test signal' dp.data_format.character_set = 'utf8' dp.data_format.nominal_sampling_rate_maximum = '44000' dp.data_format.nominal_sampling_rate_minimum = '44000' dp.CDM_data_type = 'basic' dp_id, _ = self.rr.create(dp) search_string = "search 'data_format.name' is 'test_signal' from 'data_products_index'" results = self.poll(9, self.discovery.parse, search_string) self.assertIsNotNone(results, 'Results not found') self.assertTrue(results[0]['_id'] == dp_id) search_string = "search 'CDM_data_type' is 'basic' from 'data_products_index'" results = self.poll(9, self.discovery.parse, search_string) self.assertIsNotNone(results, 'Results not found') self.assertTrue(results[0]['_id'] == dp_id) search_string = "search 'data_format.character_set' is 'utf8' from 'data_products_index'" results = self.poll(9, self.discovery.parse, search_string) self.assertIsNotNone(results, 'Results not found') self.assertTrue(results[0]['_id'] == dp_id)
def test_data_product_subscription(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) tdom, sdom = time_series_domain() dp = DataProduct(name='ctd parsed') dp.spatial_domain = sdom.dump() dp.temporal_domain = tdom.dump() data_product_id = self.data_product_management.create_data_product(data_product=dp, stream_definition_id=stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) subscription_id = self.pubsub_management.create_subscription('validator', data_product_ids=[data_product_id]) self.addCleanup(self.pubsub_management.delete_subscription, subscription_id) validated = Event() def validation(msg, route, stream_id): validated.set() stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) dp_stream_id = stream_ids.pop() validator = StandaloneStreamSubscriber('validator', callback=validation) validator.start() self.addCleanup(validator.stop) self.pubsub_management.activate_subscription(subscription_id) self.addCleanup(self.pubsub_management.deactivate_subscription, subscription_id) route = self.pubsub_management.read_stream_route(dp_stream_id) publisher = StandaloneStreamPublisher(dp_stream_id, route) publisher.publish('hi') self.assertTrue(validated.wait(10))
def setup_resource(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition('example', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) tdom, sdom = time_series_domain() dp = DataProduct(name='example') dp.spatial_domain = sdom.dump() dp.temporal_domain = tdom.dump() data_product_id = self.data_product_management.create_data_product(dp, stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) self.data_product_management.activate_data_product_persistence(data_product_id) self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id) dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0] monitor = DatasetMonitor(dataset_id) self.addCleanup(monitor.stop) rdt = ph.get_rdt(stream_def_id) ph.fill_rdt(rdt, 100) ph.publish_rdt_to_data_product(data_product_id, rdt) # Yield to other greenlets, had an issue with connectivity gevent.sleep(1) self.offering_id = dataset_id
def test_pydap(self): if not CFG.get_safe('bootstrap.use_pydap',False): raise unittest.SkipTest('PyDAP is off (bootstrap.use_pydap)') ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition('example', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) tdom, sdom = time_series_domain() dp = DataProduct(name='example') dp.spatial_domain = sdom.dump() dp.temporal_domain = tdom.dump() data_product_id = self.data_product_management.create_data_product(dp, stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) self.data_product_management.activate_data_product_persistence(data_product_id) self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id) dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0] monitor = DatasetMonitor(dataset_id) self.addCleanup(monitor.stop) rdt = ph.get_rdt(stream_def_id) ph.fill_rdt(rdt,10) ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(monitor.event.wait(10)) gevent.sleep(1) # Yield to other greenlets, had an issue with connectivity pydap_host = CFG.get_safe('server.pydap.host','localhost') pydap_port = CFG.get_safe('server.pydap.port',8001) url = 'http://%s:%s/%s' %(pydap_host, pydap_port, dataset_id) ds = open_url(url) np.testing.assert_array_equal(ds['time'][:], np.arange(10)) untested = [] for k,v in rdt.iteritems(): if k==rdt.temporal_parameter: continue context = rdt.context(k) if isinstance(context.param_type, QuantityType): np.testing.assert_array_equal(ds[k][k][:][0], rdt[k]) elif isinstance(context.param_type, ArrayType): values = np.empty(rdt[k].shape, dtype='O') for i,obj in enumerate(rdt[k]): values[i] = str(obj) np.testing.assert_array_equal(ds[k][k][:][0], values) elif isinstance(context.param_type, ConstantType): np.testing.assert_array_equal(ds[k][k][:][0], rdt[k]) elif isinstance(context.param_type, CategoryType): np.testing.assert_array_equal(ds[k][k][:][0], rdt[k]) else: untested.append('%s (%s)' % (k,context.param_type)) if untested: raise AssertionError('Untested parameters: %s' % untested)
def make_data_product(self, pdict_name, dp_name, available_fields=[]): pdict_id = self.dataset_management.read_parameter_dictionary_by_name(pdict_name, id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('%s stream_def' % dp_name, parameter_dictionary_id=pdict_id, available_fields=available_fields or None) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) tdom, sdom = time_series_domain() tdom = tdom.dump() sdom = sdom.dump() dp_obj = DataProduct(name=dp_name) dp_obj.temporal_domain = tdom dp_obj.spatial_domain = sdom data_product_id = self.data_product_management.create_data_product(dp_obj, stream_definition_id=stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) return data_product_id
def test_qc_attachment(self): instrument_device = InstrumentDevice(name='whatever') instrument_device_id,_ = self.rrclient.create(instrument_device) self.addCleanup(self.rrclient.delete, instrument_device_id) self.client.register_instrument(instrument_device_id) self.addCleanup(self.client.unregister_instrument, instrument_device_id) dp = DataProduct(name='instrument output') dp_id,_ = self.rrclient.create(dp) self.addCleanup(self.rrclient.delete, dp_id) parser_id = self.make_grt_parser() attachment = Attachment(name='qc ref', attachment_type=AttachmentType.REFERENCE,content=global_range_test_document, context=ReferenceAttachmentContext(parser_id=parser_id)) att_id = self.rrclient.create_attachment(dp_id, attachment) self.addCleanup(self.rrclient.delete_attachment, att_id) attachment2 = Attachment(name='qc ref2', attachment_type=AttachmentType.REFERENCE, content=global_range_test_document2, context=ReferenceAttachmentContext(parser_id=parser_id)) att2_id = self.rrclient.create_attachment(dp_id, attachment2) self.addCleanup(self.rrclient.delete_attachment, att2_id) self.client.assign_data_product(instrument_device_id, dp_id) self.addCleanup(self.client.unassign_data_product, instrument_device_id, dp_id) svm = StoredValueManager(self.container) doc = svm.read_value('grt_CE01ISSM-MF005-01-CTDBPC999_TEMPWAT') np.testing.assert_array_almost_equal(doc['grt_min_value'], -2.)
def load_data_product(self): dset_i = 0 dataset_management = DatasetManagementServiceClient() pubsub_management = PubsubManagementServiceClient() data_product_management = DataProductManagementServiceClient() resource_registry = self.container.instance.resource_registry dp_obj = DataProduct( name='instrument_data_product_%i' % dset_i, description='ctd stream test', processing_level_code='Parsed_Canonical') pdict_id = dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = pubsub_management.create_stream_definition(name='parsed', parameter_dictionary_id=pdict_id) self.addCleanup(pubsub_management.delete_stream_definition, stream_def_id) data_product_id = data_product_management.create_data_product(data_product=dp_obj, stream_definition_id=stream_def_id) self.addCleanup(data_product_management.delete_data_product, data_product_id) data_product_management.activate_data_product_persistence(data_product_id) self.addCleanup(data_product_management.suspend_data_product_persistence, data_product_id) stream_ids, assocs = resource_registry.find_objects(subject=data_product_id, predicate='hasStream', id_only=True) stream_id = stream_ids[0] route = pubsub_management.read_stream_route(stream_id) dataset_ids, assocs = resource_registry.find_objects(subject=data_product_id, predicate='hasDataset', id_only=True) dataset_id = dataset_ids[0] return data_product_id, stream_id, route, stream_def_id, dataset_id
def test_get_data_from_FDW(self): # generate a data product and check that the FDW can get data ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition('example', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) tdom, sdom = time_series_domain() dp = DataProduct(name='example') dp.spatial_domain = sdom.dump() dp.temporal_domain = tdom.dump() data_product_id = self.data_product_management.create_data_product(dp, stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) self.data_product_management.activate_data_product_persistence(data_product_id) self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id) dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0] monitor = DatasetMonitor(dataset_id) self.addCleanup(monitor.stop) rdt = ph.get_rdt(stream_def_id) ph.fill_rdt(rdt, 100) ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(monitor.event.wait(10)) gevent.sleep(1) # Yield to other greenlets, had an issue with connectivity print "--------------------------------" print dataset_id coverage_path = DatasetManagementService()._get_coverage_path(dataset_id) print coverage_path print "--------------------------------" #verify table exists in the DB (similar to above) # ....code... # check that the geoserver layer exists as above # ... code .... # make a WMS/WFS request...somet like this (or both) url = self.gs_host+'/geoserver/geonode/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=geonode:ooi_' + dataset_id + '_ooi&maxFeatures=1&outputFormat=csv' r = requests.get(url) assertTrue(r.status_code == 200)
def test_create_dataset_verify_geoserver_layer(self): #generate layer and check that the service created it in geoserver ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition('example', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) tdom, sdom = time_series_domain() dp = DataProduct(name='example') dp.spatial_domain = sdom.dump() dp.temporal_domain = tdom.dump() data_product_id = self.data_product_management.create_data_product(dp, stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) self.data_product_management.activate_data_product_persistence(data_product_id) self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id) dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0] monitor = DatasetMonitor(dataset_id) self.addCleanup(monitor.stop) rdt = ph.get_rdt(stream_def_id) ph.fill_rdt(rdt, 100) ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(monitor.event.wait(10)) gevent.sleep(1) # Yield to other greenlets, had an issue with connectivity log.debug("--------------------------------") log.debug(dataset_id) coverage_path = DatasetManagementService()._get_coverage_path(dataset_id) log.debug(coverage_path) log.debug("--------------------------------") # verify that the layer exists in geoserver try: r = requests.get(self.gs_rest_url + '/layers/ooi_' + dataset_id + '_ooi.xml', auth=(self.username, self.PASSWORD)) self.assertTrue(r.status_code == 200) except Exception as e: log.error("check service and layer exist...%s", e) self.assertTrue(False)
def make_dp(self, stream_def_id): stream_def = self.resource_registry.read(stream_def_id) dp_obj = DataProduct(name=stream_def.name, description=stream_def.name, processing_level_code='Parsed_Canonical') data_product_id = self.data_product_management.create_data_product( dp_obj, stream_definition_id=stream_def_id) self.data_product_management.activate_data_product_persistence( data_product_id) return data_product_id
def test_get_data_from_FDW(self): # generate a data product and check that the FDW can get data ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition( 'example', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) dp = DataProduct(name='example') data_product_id = self.data_product_management.create_data_product( dp, stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) self.data_product_management.activate_data_product_persistence( data_product_id) self.addCleanup( self.data_product_management.suspend_data_product_persistence, data_product_id) dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0] monitor = DatasetMonitor(dataset_id) self.addCleanup(monitor.stop) rdt = ph.get_rdt(stream_def_id) ph.fill_rdt(rdt, 100) ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(monitor.event.wait(10)) gevent.sleep( 1) # Yield to other greenlets, had an issue with connectivity print "--------------------------------" print dataset_id coverage_path = DatasetManagementService()._get_coverage_path( dataset_id) print coverage_path print "--------------------------------" #verify table exists in the DB (similar to above) # ....code... # check that the geoserver layer exists as above # ... code .... # make a WMS/WFS request...somet like this (or both) url = self.gs_host + '/geoserver/geonode/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=geonode:ooi_' + dataset_id + '_ooi&maxFeatures=1&outputFormat=csv' r = requests.get(url) assertTrue(r.status_code == 200)
def test_create_dataset_verify_geoserver_layer(self): #generate layer and check that the service created it in geoserver ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition( 'example', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) dp = DataProduct(name='example') data_product_id = self.data_product_management.create_data_product( dp, stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) self.data_product_management.activate_data_product_persistence( data_product_id) self.addCleanup( self.data_product_management.suspend_data_product_persistence, data_product_id) dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0] monitor = DatasetMonitor(dataset_id) self.addCleanup(monitor.stop) rdt = ph.get_rdt(stream_def_id) ph.fill_rdt(rdt, 100) ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(monitor.event.wait(10)) gevent.sleep( 1) # Yield to other greenlets, had an issue with connectivity log.debug("--------------------------------") log.debug(dataset_id) coverage_path = DatasetManagementService()._get_coverage_path( dataset_id) log.debug(coverage_path) log.debug("--------------------------------") # verify that the layer exists in geoserver try: r = requests.get(self.gs_rest_url + '/layers/ooi_' + dataset_id + '_ooi.xml', auth=(self.username, self.PASSWORD)) self.assertTrue(r.status_code == 200) except Exception as e: log.error("check service and layer exist...%s", e) self.assertTrue(False)
def test_ownership_searching(self): # Create two data products so that there is competition to the search, one is parsed # (with conductivity as a parameter) and the other is raw dp = DataProduct(name='example dataproduct') pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict') stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id) tdom, sdom = time_series_domain() dp.spatial_domain = sdom.dump() dp.temporal_domain = tdom.dump() dp_id = self.data_product_management.create_data_product(dp, stream_definition_id=stream_def_id, exchange_point='xp1') pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_raw_param_dict') stream_def_id = self.pubsub_management.create_stream_definition('ctd raw', parameter_dictionary_id=pdict_id) dp = DataProduct(name='WRONG') dp.spatial_domain = sdom.dump() dp.temporal_domain = tdom.dump() self.data_product_management.create_data_product(dp, stream_definition_id=stream_def_id, exchange_point='xp1') parameter_search = 'search "name" is "conductivity" from "resources_index"' results = self.poll(9, self.discovery.parse, parameter_search) param_id = results[0]['_id'] data_product_search = 'search "name" is "*" from "data_products_index" and has "%s"' % param_id results = self.poll(9, self.discovery.parse, data_product_search) print results self.assertEquals(results[0], dp_id)
def create_data_product(self, name='', description='', stream_def_id=''): dp_obj = DataProduct(name=name, description=description, processing_level_code='Parsed_Canonical') data_product_id = self.data_product_management.create_data_product( data_product=dp_obj, stream_definition_id=stream_def_id) self.data_product_management.activate_data_product_persistence( data_product_id) self.addCleanup( self.data_product_management.suspend_data_product_persistence, data_product_id) return data_product_id
def test_create_dataset(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition('example', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) tdom, sdom = time_series_domain() dp = DataProduct(name='example') dp.spatial_domain = sdom.dump() dp.temporal_domain = tdom.dump() data_product_id = self.data_product_management.create_data_product(dp, stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) self.data_product_management.activate_data_product_persistence(data_product_id) self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id) dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0] monitor = DatasetMonitor(dataset_id) self.addCleanup(monitor.stop) rdt = ph.get_rdt(stream_def_id) ph.fill_rdt(rdt, 100) ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(monitor.event.wait(10)) # Yield to other greenlets, had an issue with connectivity gevent.sleep(1) log.debug("--------------------------------") log.debug(dataset_id) coverage_path = DatasetManagementService()._get_coverage_path(dataset_id) log.debug(coverage_path) log.debug("--------------------------------") breakpoint(locals(), globals())
def create_data_product(self, name='', description='', stream_def_id=''): tdom, sdom = time_series_domain() tdom = tdom.dump() sdom = sdom.dump() dp_obj = DataProduct( name=name, description=description, processing_level_code='Parsed_Canonical', temporal_domain=tdom, spatial_domain=sdom) data_product_id = self.data_product_management.create_data_product(data_product=dp_obj, stream_definition_id=stream_def_id) self.data_product_management.activate_data_product_persistence(data_product_id) self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id) return data_product_id
def make_dp(self, stream_def_id): tdom, sdom = time_series_domain() tdom = tdom.dump() sdom = sdom.dump() stream_def = self.resource_registry.read(stream_def_id) dp_obj = DataProduct(name=stream_def.name, description=stream_def.name, processing_level_code='Parsed_Canonical', temporal_domain=tdom, spatial_domain=sdom) data_product_id = self.data_product_management.create_data_product( dp_obj, stream_definition_id=stream_def_id) self.data_product_management.activate_data_product_persistence( data_product_id) return data_product_id
def create_data_product(self,name, stream_def_id='', param_dict_name='', pdict_id='', stream_configuration=None): if not (stream_def_id or param_dict_name or pdict_id): raise AssertionError('Attempted to create a Data Product without a parameter dictionary') dp = DataProduct(name=name) stream_def_id = stream_def_id or self.create_stream_definition('%s stream def' % name, parameter_dictionary_id=pdict_id or self.RR2.find_resource_by_name(RT.ParameterDictionary, param_dict_name, id_only=True)) stream_config = stream_configuration or StreamConfiguration(stream_name='parsed_ctd', stream_type=StreamConfigurationType.PARSED) data_product_id = self.data_product_management.create_data_product(dp, stream_definition_id=stream_def_id, default_stream_configuration=stream_config) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) return data_product_id
def make_data_product(self, pdict_name, dp_name, available_fields=None): self.pubsub_management = PubsubManagementServiceClient() if available_fields is None: available_fields = [] pdict_id = self.dataset_management.read_parameter_dictionary_by_name( pdict_name, id_only=True) stream_def_id = self.pubsub_management.create_stream_definition( '%s stream_def' % dp_name, parameter_dictionary_id=pdict_id, available_fields=available_fields or None) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) dp_obj = DataProduct(name=dp_name) data_product_id = self.data_product_management.create_data_product( dp_obj, stream_definition_id=stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) return data_product_id
def create_data_product(self,name, stream_def_id='', param_dict_name='', pdict_id=''): if not (stream_def_id or param_dict_name or pdict_id): raise AssertionError('Attempted to create a Data Product without a parameter dictionary') tdom, sdom = time_series_domain() dp = DataProduct(name=name, spatial_domain = sdom.dump(), temporal_domain = tdom.dump(), ) stream_def_id = stream_def_id or self.create_stream_definition('%s stream def' % name, parameter_dictionary_id=pdict_id or self.RR2.find_resource_by_name(RT.ParameterDictionary, param_dict_name, id_only=True)) data_product_id = self.data_product_management.create_data_product(dp, stream_definition_id=stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) return data_product_id
def test_create_dataset(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition( 'example', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) dp = DataProduct(name='example') data_product_id = self.data_product_management.create_data_product( dp, stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) self.data_product_management.activate_data_product_persistence( data_product_id) self.addCleanup( self.data_product_management.suspend_data_product_persistence, data_product_id) dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0] monitor = DatasetMonitor(dataset_id) self.addCleanup(monitor.stop) rdt = ph.get_rdt(stream_def_id) ph.fill_rdt(rdt, 100) ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(monitor.event.wait(10)) # Yield to other greenlets, had an issue with connectivity gevent.sleep(1) log.debug("--------------------------------") log.debug(dataset_id) coverage_path = DatasetManagementService()._get_coverage_path( dataset_id) log.debug(coverage_path) log.debug("--------------------------------") breakpoint(locals(), globals())
def test_fdt_created_during(self): # generate a data product and check that the FDT exists ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition( 'example', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) dp = DataProduct(name='example') data_product_id = self.data_product_management.create_data_product( dp, stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) self.data_product_management.activate_data_product_persistence( data_product_id) self.addCleanup( self.data_product_management.suspend_data_product_persistence, data_product_id) dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0] monitor = DatasetMonitor(dataset_id) self.addCleanup(monitor.stop) rdt = ph.get_rdt(stream_def_id) ph.fill_rdt(rdt, 100) ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(monitor.event.wait(10)) gevent.sleep( 1) # Yield to other greenlets, had an issue with connectivity print "--------------------------------" print dataset_id coverage_path = DatasetManagementService()._get_coverage_path( dataset_id) print coverage_path print "--------------------------------"
def setup_resource(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition( 'example', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) dp = DataProduct(name='example') data_product_id = self.data_product_management.create_data_product( dp, stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) self.data_product_management.activate_data_product_persistence( data_product_id) self.addCleanup( self.data_product_management.suspend_data_product_persistence, data_product_id) dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0] monitor = DatasetMonitor(dataset_id) self.addCleanup(monitor.stop) rdt = ph.get_rdt(stream_def_id) ph.fill_rdt(rdt, 100) ph.publish_rdt_to_data_product(data_product_id, rdt) # Yield to other greenlets, had an issue with connectivity gevent.sleep(1) self.offering_id = dataset_id
def test_add_parameter_to_data_product(self): #self.preload_ui() self.test_add_parameter_function() data_product_id = self.data_product_id stream_def_id = self.resource_registry.find_objects( data_product_id, PRED.hasStreamDefinition, id_only=True)[0][0] pdict_id = self.resource_registry.find_objects( stream_def_id, PRED.hasParameterDictionary, id_only=True)[0][0] # Create a new data product htat represents the L1 temp from the ctd simulator dp = DataProduct(name='CTD Simulator TEMPWAT L1', category=DataProductTypeEnum.DERIVED) stream_def_id = self.pubsub_management.create_stream_definition( name='tempwat_l1', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp']) dp_id = self.data_product_management.create_data_product( dp, stream_definition_id=stream_def_id, parent_data_product_id=data_product_id) parameter_function = ParameterFunction(name='linear_corr', function_type=PFT.NUMEXPR, function='a * x + b', args=['x', 'a', 'b']) pf_id = self.dataset_management.create_parameter_function( parameter_function) dpd = DataProcessDefinition(name='linear_corr', description='Linear Correction') self.data_process_management.create_data_process_definition(dpd, pf_id) parameter = ParameterContext(name='temperature_corrected', parameter_type='function', parameter_function_id=pf_id, parameter_function_map={ 'x': 'temp', 'a': 1.03, 'b': 0.25 }, value_encoding='float32', units='deg_C', display_name='Temperature Corrected') p_id = self.dataset_management.create_parameter(parameter) # Add it to the parent or parsed data product self.data_product_management.add_parameter_to_data_product( p_id, data_product_id) # Then update the child's stream definition to include it stream_def = self.pubsub_management.read_stream_definition( stream_def_id) stream_def.available_fields.append('temperature_corrected') self.resource_registry.update(stream_def) dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset( data_product_id) # For some reason, it takes numerous seconds of yielding with gevent for the coverage to actually save... gevent.sleep(10) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_almost_equal( rdt['temperature_corrected'], np.arange(30, dtype=np.float32) * 1.03 + 0.25, decimal=5)
def test_lookup_values(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_lookups() stream_def_id = self.pubsubcli.create_stream_definition( 'lookup', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsubcli.delete_stream_definition, stream_def_id) data_product = DataProduct(name='lookup data product') tdom, sdom = time_series_domain() data_product.temporal_domain = tdom.dump() data_product.spatial_domain = sdom.dump() data_product_id = self.dpsc_cli.create_data_product( data_product, stream_definition_id=stream_def_id) self.addCleanup(self.dpsc_cli.delete_data_product, data_product_id) data_producer = DataProducer(name='producer') data_producer.producer_context = DataProcessProducerContext() data_producer.producer_context.configuration['qc_keys'] = [ 'offset_document' ] data_producer_id, _ = self.rrclient.create(data_producer) self.addCleanup(self.rrclient.delete, data_producer_id) assoc, _ = self.rrclient.create_association( subject=data_product_id, object=data_producer_id, predicate=PRED.hasDataProducer) self.addCleanup(self.rrclient.delete_association, assoc) document_keys = self.damsclient.list_qc_references(data_product_id) self.assertEquals(document_keys, ['offset_document']) svm = StoredValueManager(self.container) svm.stored_value_cas('offset_document', {'offset_a': 2.0}) self.dpsc_cli.activate_data_product_persistence(data_product_id) dataset_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasDataset, id_only=True) dataset_id = dataset_ids[0] dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [0] rdt['temp'] = [20.] granule = rdt.to_granule() stream_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) stream_id = stream_ids[0] route = self.pubsubcli.read_stream_route(stream_id=stream_id) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(10)) granule = self.data_retriever.retrieve(dataset_id) rdt2 = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['temp'], rdt2['temp']) np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0])) svm.stored_value_cas('updated_document', {'offset_a': 3.0}) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ep = EventPublisher(event_type=OT.ExternalReferencesUpdatedEvent) ep.publish_event(origin=data_product_id, reference_keys=['updated_document']) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [1] rdt['temp'] = [20.] granule = rdt.to_granule() gevent.sleep(2) # Yield so that the event goes through publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(10)) granule = self.data_retriever.retrieve(dataset_id) rdt2 = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt2['temp'], np.array([20., 20.])) np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0, 23.0]))
def test_pydap(self): if not CFG.get_safe('bootstrap.use_pydap', False): raise unittest.SkipTest('PyDAP is off (bootstrap.use_pydap)') ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition( 'example', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) tdom, sdom = time_series_domain() dp = DataProduct(name='example') dp.spatial_domain = sdom.dump() dp.temporal_domain = tdom.dump() data_product_id = self.data_product_management.create_data_product( dp, stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) self.data_product_management.activate_data_product_persistence( data_product_id) self.addCleanup( self.data_product_management.suspend_data_product_persistence, data_product_id) dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0] monitor = DatasetMonitor(dataset_id) self.addCleanup(monitor.stop) rdt = ph.get_rdt(stream_def_id) ph.fill_rdt(rdt, 10) ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(monitor.event.wait(10)) gevent.sleep( 1) # Yield to other greenlets, had an issue with connectivity pydap_host = CFG.get_safe('server.pydap.host', 'localhost') pydap_port = CFG.get_safe('server.pydap.port', 8001) url = 'http://%s:%s/%s' % (pydap_host, pydap_port, dataset_id) for i in xrange( 3 ): # Do it three times to test that the cache doesn't corrupt the requests/responses ds = open_url(url) np.testing.assert_array_equal(ds['time'][:], np.arange(10)) untested = [] for k, v in rdt.iteritems(): if k == rdt.temporal_parameter: continue context = rdt.context(k) if isinstance(context.param_type, QuantityType): np.testing.assert_array_equal(ds[k][k][:][0], rdt[k]) elif isinstance(context.param_type, ArrayType): if context.param_type.inner_encoding is None: values = np.empty(rdt[k].shape, dtype='O') for i, obj in enumerate(rdt[k]): values[i] = str(obj) np.testing.assert_array_equal(ds[k][k][:][0], values) elif len(rdt[k].shape) > 1: values = np.empty(rdt[k].shape[0], dtype='O') for i in xrange(rdt[k].shape[0]): values[i] = ','.join( map(lambda x: str(x), rdt[k][i].tolist())) elif isinstance(context.param_type, ConstantType): np.testing.assert_array_equal(ds[k][k][:][0], rdt[k]) elif isinstance(context.param_type, CategoryType): np.testing.assert_array_equal(ds[k][k][:][0], rdt[k]) else: untested.append('%s (%s)' % (k, context.param_type)) if untested: raise AssertionError('Untested parameters: %s' % untested)
def test_derived_data_product(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubcli.create_stream_definition( name='ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsubcli.delete_stream_definition, ctd_stream_def_id) dp = DataProduct(name='Instrument DP') dp_id = self.dpsc_cli.create_data_product( dp, stream_definition_id=ctd_stream_def_id) self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id) self.dpsc_cli.activate_data_product_persistence(dp_id) self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id) dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True) if not dataset_ids: raise NotFound("Data Product %s dataset does not exist" % str(dp_id)) dataset_id = dataset_ids[0] # Make the derived data product simple_stream_def_id = self.pubsubcli.create_stream_definition( name='TEMPWAT stream def', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp']) tempwat_dp = DataProduct(name='TEMPWAT', category=DataProductTypeEnum.DERIVED) tempwat_dp_id = self.dpsc_cli.create_data_product( tempwat_dp, stream_definition_id=simple_stream_def_id, parent_data_product_id=dp_id) self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id) # Check that the streams associated with the data product are persisted with stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True) for stream_id in stream_ids: self.assertTrue(self.ingestclient.is_persisted(stream_id)) stream_id = stream_ids[0] route = self.pubsubcli.read_stream_route(stream_id=stream_id) rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) rdt['pressure'] = np.arange(20) publisher = StandaloneStreamPublisher(stream_id, route) dataset_modified = Event() def cb(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True) es.start() self.addCleanup(es.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_modified.wait(30)) tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id, PRED.hasDataset, id_only=True) tempwat_dataset_id = tempwat_dataset_ids[0] granule = self.data_retriever.retrieve( tempwat_dataset_id, delivery_format=simple_stream_def_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(20)) self.assertEquals(set(rdt.fields), set(['time', 'temp']))
def _setup_resources(self): # TODO: some or all of this (or some variation) should move to DAMS' # Build the test resources for the dataset dams_cli = DataAcquisitionManagementServiceClient() dpms_cli = DataProductManagementServiceClient() rr_cli = ResourceRegistryServiceClient() eda = ExternalDatasetAgent() eda_id = dams_cli.create_external_dataset_agent(eda) eda_inst = ExternalDatasetAgentInstance() eda_inst_id = dams_cli.create_external_dataset_agent_instance(eda_inst, external_dataset_agent_id=eda_id) # Create and register the necessary resources/objects # Create DataProvider dprov = ExternalDataProvider(institution=Institution(), contact=ContactInformation()) dprov.contact.name = 'Christopher Mueller' dprov.contact.email = '*****@*****.**' # Create DataSource dsrc = DataSource(protocol_type='FILE', institution=Institution(), contact=ContactInformation()) dsrc.connection_params['base_data_url'] = '' dsrc.contact.name='Tim Giguere' dsrc.contact.email = '*****@*****.**' # Create ExternalDataset ds_name = 'slocum_test_dataset' dset = ExternalDataset(name=ds_name, dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation()) dset.dataset_description.parameters['dataset_path'] = 'test_data/ru05-2012-021-0-0-sbd.dat' dset.dataset_description.parameters['temporal_dimension'] = None dset.dataset_description.parameters['zonal_dimension'] = None dset.dataset_description.parameters['meridional_dimension'] = None dset.dataset_description.parameters['vertical_dimension'] = None dset.dataset_description.parameters['variables'] = [ 'c_wpt_y_lmc', 'sci_water_cond', 'm_y_lmc', 'u_hd_fin_ap_inflection_holdoff', 'sci_m_present_time', 'm_leakdetect_voltage_forward', 'sci_bb3slo_b660_scaled', 'c_science_send_all', 'm_gps_status', 'm_water_vx', 'm_water_vy', 'c_heading', 'sci_fl3slo_chlor_units', 'u_hd_fin_ap_gain', 'm_vacuum', 'u_min_water_depth', 'm_gps_lat', 'm_veh_temp', 'f_fin_offset', 'u_hd_fin_ap_hardover_holdoff', 'c_alt_time', 'm_present_time', 'm_heading', 'sci_bb3slo_b532_scaled', 'sci_fl3slo_cdom_units', 'm_fin', 'x_cycle_overrun_in_ms', 'sci_water_pressure', 'u_hd_fin_ap_igain', 'sci_fl3slo_phyco_units', 'm_battpos', 'sci_bb3slo_b470_scaled', 'm_lat', 'm_gps_lon', 'sci_ctd41cp_timestamp', 'm_pressure', 'c_wpt_x_lmc', 'c_ballast_pumped', 'x_lmc_xy_source', 'm_lon', 'm_avg_speed', 'sci_water_temp', 'u_pitch_ap_gain', 'm_roll', 'm_tot_num_inflections', 'm_x_lmc', 'u_pitch_ap_deadband', 'm_final_water_vy', 'm_final_water_vx', 'm_water_depth', 'm_leakdetect_voltage', 'u_pitch_max_delta_battpos', 'm_coulomb_amphr', 'm_pitch', ] # Create DataSourceModel dsrc_model = DataSourceModel(name='slocum_model') dsrc_model.model = 'SLOCUM' dsrc_model.data_handler_module = 'N/A' dsrc_model.data_handler_class = 'N/A' ## Run everything through DAMS ds_id = dams_cli.create_external_dataset(external_dataset=dset) ext_dprov_id = dams_cli.create_external_data_provider(external_data_provider=dprov) ext_dsrc_id = dams_cli.create_data_source(data_source=dsrc) ext_dsrc_model_id = dams_cli.create_data_source_model(dsrc_model) # Register the ExternalDataset dproducer_id = dams_cli.register_external_data_set(external_dataset_id=ds_id) # Or using each method dams_cli.assign_data_source_to_external_data_provider(data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id) dams_cli.assign_data_source_to_data_model(data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id) dams_cli.assign_external_dataset_to_data_source(external_dataset_id=ds_id, data_source_id=ext_dsrc_id) dams_cli.assign_external_dataset_to_agent_instance(external_dataset_id=ds_id, agent_instance_id=eda_inst_id) # dams_cli.assign_external_data_agent_to_agent_instance(external_data_agent_id=self.eda_id, agent_instance_id=self.eda_inst_id) # Generate the data product and associate it to the ExternalDataset dprod = DataProduct(name='slocum_parsed_product', description='parsed slocum product') dproduct_id = dpms_cli.create_data_product(data_product=dprod) dams_cli.assign_data_product(input_resource_id=ds_id, data_product_id=dproduct_id, create_stream=True) stream_id, assn = rr_cli.find_objects(subject=dproduct_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True) stream_id = stream_id[0] log.info('Created resources: {0}'.format({'ExternalDataset':ds_id, 'ExternalDataProvider':ext_dprov_id, 'DataSource':ext_dsrc_id, 'DataSourceModel':ext_dsrc_model_id, 'DataProducer':dproducer_id, 'DataProduct':dproduct_id, 'Stream':stream_id})) #CBM: Use CF standard_names ttool = TaxyTool() ttool.add_taxonomy_set('c_wpt_y_lmc'), ttool.add_taxonomy_set('sci_water_cond'), ttool.add_taxonomy_set('m_y_lmc'), ttool.add_taxonomy_set('u_hd_fin_ap_inflection_holdoff'), ttool.add_taxonomy_set('sci_m_present_time'), ttool.add_taxonomy_set('m_leakdetect_voltage_forward'), ttool.add_taxonomy_set('sci_bb3slo_b660_scaled'), ttool.add_taxonomy_set('c_science_send_all'), ttool.add_taxonomy_set('m_gps_status'), ttool.add_taxonomy_set('m_water_vx'), ttool.add_taxonomy_set('m_water_vy'), ttool.add_taxonomy_set('c_heading'), ttool.add_taxonomy_set('sci_fl3slo_chlor_units'), ttool.add_taxonomy_set('u_hd_fin_ap_gain'), ttool.add_taxonomy_set('m_vacuum'), ttool.add_taxonomy_set('u_min_water_depth'), ttool.add_taxonomy_set('m_gps_lat'), ttool.add_taxonomy_set('m_veh_temp'), ttool.add_taxonomy_set('f_fin_offset'), ttool.add_taxonomy_set('u_hd_fin_ap_hardover_holdoff'), ttool.add_taxonomy_set('c_alt_time'), ttool.add_taxonomy_set('m_present_time'), ttool.add_taxonomy_set('m_heading'), ttool.add_taxonomy_set('sci_bb3slo_b532_scaled'), ttool.add_taxonomy_set('sci_fl3slo_cdom_units'), ttool.add_taxonomy_set('m_fin'), ttool.add_taxonomy_set('x_cycle_overrun_in_ms'), ttool.add_taxonomy_set('sci_water_pressure'), ttool.add_taxonomy_set('u_hd_fin_ap_igain'), ttool.add_taxonomy_set('sci_fl3slo_phyco_units'), ttool.add_taxonomy_set('m_battpos'), ttool.add_taxonomy_set('sci_bb3slo_b470_scaled'), ttool.add_taxonomy_set('m_lat'), ttool.add_taxonomy_set('m_gps_lon'), ttool.add_taxonomy_set('sci_ctd41cp_timestamp'), ttool.add_taxonomy_set('m_pressure'), ttool.add_taxonomy_set('c_wpt_x_lmc'), ttool.add_taxonomy_set('c_ballast_pumped'), ttool.add_taxonomy_set('x_lmc_xy_source'), ttool.add_taxonomy_set('m_lon'), ttool.add_taxonomy_set('m_avg_speed'), ttool.add_taxonomy_set('sci_water_temp'), ttool.add_taxonomy_set('u_pitch_ap_gain'), ttool.add_taxonomy_set('m_roll'), ttool.add_taxonomy_set('m_tot_num_inflections'), ttool.add_taxonomy_set('m_x_lmc'), ttool.add_taxonomy_set('u_pitch_ap_deadband'), ttool.add_taxonomy_set('m_final_water_vy'), ttool.add_taxonomy_set('m_final_water_vx'), ttool.add_taxonomy_set('m_water_depth'), ttool.add_taxonomy_set('m_leakdetect_voltage'), ttool.add_taxonomy_set('u_pitch_max_delta_battpos'), ttool.add_taxonomy_set('m_coulomb_amphr'), ttool.add_taxonomy_set('m_pitch'), #CBM: Eventually, probably want to group this crap somehow - not sure how yet... # Create the logger for receiving publications self.create_stream_and_logger(name='slocum',stream_id=stream_id) self.EDA_RESOURCE_ID = ds_id self.EDA_NAME = ds_name self.DVR_CONFIG['dh_cfg'] = { 'TESTING':True, 'stream_id':stream_id, 'external_dataset_res':dset, 'taxonomy':ttool.dump(), 'data_producer_id':dproducer_id,#CBM: Should this be put in the main body of the config - with mod & cls? 'max_records':20, }
def test_lookup_values(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_lookups() stream_def_id = self.pubsubcli.create_stream_definition('lookup', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsubcli.delete_stream_definition, stream_def_id) data_product = DataProduct(name='lookup data product') tdom, sdom = time_series_domain() data_product.temporal_domain = tdom.dump() data_product.spatial_domain = sdom.dump() data_product_id = self.dpsc_cli.create_data_product(data_product, stream_definition_id=stream_def_id) self.addCleanup(self.dpsc_cli.delete_data_product, data_product_id) data_producer = DataProducer(name='producer') data_producer.producer_context = DataProcessProducerContext() data_producer.producer_context.configuration['qc_keys'] = ['offset_document'] data_producer_id, _ = self.rrclient.create(data_producer) self.addCleanup(self.rrclient.delete, data_producer_id) assoc,_ = self.rrclient.create_association(subject=data_product_id, object=data_producer_id, predicate=PRED.hasDataProducer) self.addCleanup(self.rrclient.delete_association, assoc) document_keys = self.damsclient.list_qc_references(data_product_id) self.assertEquals(document_keys, ['offset_document']) svm = StoredValueManager(self.container) svm.stored_value_cas('offset_document', {'offset_a':2.0}) self.dpsc_cli.activate_data_product_persistence(data_product_id) dataset_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasDataset, id_only=True) dataset_id = dataset_ids[0] dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [0] rdt['temp'] = [20.] granule = rdt.to_granule() stream_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) stream_id = stream_ids[0] route = self.pubsubcli.read_stream_route(stream_id=stream_id) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(10)) granule = self.data_retriever.retrieve(dataset_id) rdt2 = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['temp'], rdt2['temp']) np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0])) svm.stored_value_cas('updated_document', {'offset_a':3.0}) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ep = EventPublisher(event_type=OT.ExternalReferencesUpdatedEvent) ep.publish_event(origin=data_product_id, reference_keys=['updated_document']) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [1] rdt['temp'] = [20.] granule = rdt.to_granule() gevent.sleep(2) # Yield so that the event goes through publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(10)) granule = self.data_retriever.retrieve(dataset_id) rdt2 = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt2['temp'],np.array([20.,20.])) np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0,23.0]))
def test_get_data_product_provenance_report(self): #Create a test device device_obj = Device(name='Device1', description='test instrument site') device_id, _ = self.rrclient.create(device_obj) self.addCleanup(self.rrclient.delete, device_id) #Create a test DataProduct data_product1_obj = DataProduct(name='DataProduct1', description='test data product 1') data_product1_id, _ = self.rrclient.create(data_product1_obj) self.addCleanup(self.rrclient.delete, data_product1_id) #Create a test DataProcess data_process_obj = DataProcess(name='DataProcess', description='test data process') data_process_id, _ = self.rrclient.create(data_process_obj) self.addCleanup(self.rrclient.delete, data_process_id) #Create a second test DataProduct data_product2_obj = DataProduct(name='DataProduct2', description='test data product 2') data_product2_id, _ = self.rrclient.create(data_product2_obj) self.addCleanup(self.rrclient.delete, data_product2_id) #Create a test DataProducer data_producer_obj = DataProducer(name='DataProducer', description='test data producer') data_producer_id, rev = self.rrclient.create(data_producer_obj) #Link the DataProcess to the second DataProduct manually assoc_id, _ = self.rrclient.create_association( subject=data_process_id, predicate=PRED.hasInputProduct, object=data_product2_id) self.addCleanup(self.rrclient.delete_association, assoc_id) # Register the instrument and process. This links the device and the data process # with their own producers self.damsclient.register_instrument(device_id) self.addCleanup(self.damsclient.unregister_instrument, device_id) self.damsclient.register_process(data_process_id) self.addCleanup(self.damsclient.unregister_process, data_process_id) #Manually link the first DataProduct with the test DataProducer assoc_id, _ = self.rrclient.create_association( subject=data_product1_id, predicate=PRED.hasDataProducer, object=data_producer_id) #Get the DataProducer linked to the DataProcess (created in register_process above) #Associate that with with DataProduct1's DataProducer data_process_producer_ids, _ = self.rrclient.find_objects( subject=data_process_id, predicate=PRED.hasDataProducer, object_type=RT.DataProducer, id_only=True) assoc_id, _ = self.rrclient.create_association( subject=data_process_producer_ids[0], predicate=PRED.hasParent, object=data_producer_id) self.addCleanup(self.rrclient.delete_association, assoc_id) #Get the DataProducer linked to the Device (created in register_instrument #Associate that with the DataProcess's DataProducer device_producer_ids, _ = self.rrclient.find_objects( subject=device_id, predicate=PRED.hasDataProducer, object_type=RT.DataProducer, id_only=True) assoc_id, _ = self.rrclient.create_association( subject=data_producer_id, predicate=PRED.hasParent, object=device_producer_ids[0]) #Create the links between the Device, DataProducts, DataProcess, and all DataProducers self.damsclient.assign_data_product(input_resource_id=device_id, data_product_id=data_product1_id) self.addCleanup(self.damsclient.unassign_data_product, device_id, data_product1_id) self.damsclient.assign_data_product(input_resource_id=data_process_id, data_product_id=data_product2_id) self.addCleanup(self.damsclient.unassign_data_product, data_process_id, data_product2_id) #Traverse through the relationships to get the links between objects res = self.dpmsclient.get_data_product_provenance_report( data_product2_id) #Make sure there are four keys self.assertEqual(len(res.keys()), 4) parent_count = 0 config_count = 0 for v in res.itervalues(): if 'parent' in v: parent_count += 1 if 'config' in v: config_count += 1 #Make sure there are three parents and four configs self.assertEqual(parent_count, 3) self.assertEqual(config_count, 4)
def _setup_resources(self): # TODO: some or all of this (or some variation) should move to DAMS' # Build the test resources for the dataset dams_cli = DataAcquisitionManagementServiceClient() dpms_cli = DataProductManagementServiceClient() rr_cli = ResourceRegistryServiceClient() eda = ExternalDatasetAgent() eda_id = dams_cli.create_external_dataset_agent(eda) eda_inst = ExternalDatasetAgentInstance() eda_inst_id = dams_cli.create_external_dataset_agent_instance( eda_inst, external_dataset_agent_id=eda_id) # Create and register the necessary resources/objects # Create DataProvider dprov = ExternalDataProvider(institution=Institution(), contact=ContactInformation()) dprov.contact.name = 'Christopher Mueller' dprov.contact.email = '*****@*****.**' # Create DataSource dsrc = DataSource(protocol_type='DAP', institution=Institution(), contact=ContactInformation()) dsrc.connection_params['base_data_url'] = '' dsrc.contact.name = 'Tim Giguere' dsrc.contact.email = '*****@*****.**' # Create ExternalDataset ds_name = 'usgs_test_dataset' dset = ExternalDataset(name=ds_name, dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation()) # The usgs.nc test dataset is a download of the R1 dataset found here: # http://thredds-test.oceanobservatories.org/thredds/dodsC/ooiciData/E66B1A74-A684-454A-9ADE-8388C2C634E5.ncml dset.dataset_description.parameters[ 'dataset_path'] = 'test_data/usgs.nc' dset.dataset_description.parameters['temporal_dimension'] = 'time' dset.dataset_description.parameters['zonal_dimension'] = 'lon' dset.dataset_description.parameters['meridional_dimension'] = 'lat' dset.dataset_description.parameters['vertical_dimension'] = 'z' dset.dataset_description.parameters['variables'] = [ 'water_temperature', 'streamflow', 'water_temperature_bottom', 'water_temperature_middle', 'specific_conductance', 'data_qualifier', ] # Create DataSourceModel dsrc_model = DataSourceModel(name='dap_model') dsrc_model.model = 'DAP' dsrc_model.data_handler_module = 'N/A' dsrc_model.data_handler_class = 'N/A' ## Run everything through DAMS ds_id = dams_cli.create_external_dataset(external_dataset=dset) ext_dprov_id = dams_cli.create_external_data_provider( external_data_provider=dprov) ext_dsrc_id = dams_cli.create_data_source(data_source=dsrc) ext_dsrc_model_id = dams_cli.create_data_source_model(dsrc_model) # Register the ExternalDataset dproducer_id = dams_cli.register_external_data_set( external_dataset_id=ds_id) # Or using each method dams_cli.assign_data_source_to_external_data_provider( data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id) dams_cli.assign_data_source_to_data_model( data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id) dams_cli.assign_external_dataset_to_data_source( external_dataset_id=ds_id, data_source_id=ext_dsrc_id) dams_cli.assign_external_dataset_to_agent_instance( external_dataset_id=ds_id, agent_instance_id=eda_inst_id) # dams_cli.assign_external_data_agent_to_agent_instance(external_data_agent_id=self.eda_id, agent_instance_id=self.eda_inst_id) # Generate the data product and associate it to the ExternalDataset dprod = DataProduct(name='usgs_parsed_product', description='parsed usgs product') dproduct_id = dpms_cli.create_data_product(data_product=dprod) dams_cli.assign_data_product(input_resource_id=ds_id, data_product_id=dproduct_id, create_stream=True) stream_id, assn = rr_cli.find_objects(subject=dproduct_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True) stream_id = stream_id[0] log.info('Created resources: {0}'.format({ 'ExternalDataset': ds_id, 'ExternalDataProvider': ext_dprov_id, 'DataSource': ext_dsrc_id, 'DataSourceModel': ext_dsrc_model_id, 'DataProducer': dproducer_id, 'DataProduct': dproduct_id, 'Stream': stream_id })) #CBM: Use CF standard_names ttool = TaxyTool() ttool.add_taxonomy_set('time', 'time') ttool.add_taxonomy_set('lon', 'longitude') ttool.add_taxonomy_set('lat', 'latitude') ttool.add_taxonomy_set('z', 'water depth') ttool.add_taxonomy_set('water_temperature', 'average water temperature') ttool.add_taxonomy_set('water_temperature_bottom', 'water temperature at bottom of water column') ttool.add_taxonomy_set('water_temperature_middle', 'water temperature at middle of water column') ttool.add_taxonomy_set('streamflow', 'flow velocity of stream') ttool.add_taxonomy_set('specific_conductance', 'specific conductance of water') ttool.add_taxonomy_set('data_qualifier', 'data qualifier flag') ttool.add_taxonomy_set('coords', 'This group contains coordinate parameters') ttool.add_taxonomy_set('data', 'This group contains data parameters') # Create the logger for receiving publications self.create_stream_and_logger(name='usgs', stream_id=stream_id) self.EDA_RESOURCE_ID = ds_id self.EDA_NAME = ds_name self.DVR_CONFIG['dh_cfg'] = { 'TESTING': True, 'stream_id': stream_id, 'taxonomy': ttool.dump(), 'data_producer_id': dproducer_id, #CBM: Should this be put in the main body of the config - with mod & cls? 'max_records': 4, }
def _setup_ncom(self): # TODO: some or all of this (or some variation) should move to DAMS # Create and register the necessary resources/objects eda = ExternalDatasetAgent() eda_id = self.dams_cli.create_external_dataset_agent(eda) eda_inst = ExternalDatasetAgentInstance() eda_inst_id = self.dams_cli.create_external_dataset_agent_instance( eda_inst, external_dataset_agent_id=eda_id) # Create DataProvider dprov = ExternalDataProvider(institution=Institution(), contact=ContactInformation()) # dprov.institution.name = "OOI CGSN" dprov.contact.name = "Robert Weller" dprov.contact.email = "*****@*****.**" # Create DataSource dsrc = DataSource(protocol_type="DAP", institution=Institution(), contact=ContactInformation()) # dsrc.connection_params["base_data_url"] = "http://ooi.whoi.edu/thredds/dodsC/" dsrc.connection_params["base_data_url"] = "" dsrc.contact.name = "Rich Signell" dsrc.contact.email = "*****@*****.**" # Create ExternalDataset dset = ExternalDataset(name="test", dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation()) # dset.dataset_description.parameters["dataset_path"] = "ooi/AS02CPSM_R_M.nc" dset.dataset_description.parameters[ "dataset_path"] = "test_data/ncom.nc" dset.dataset_description.parameters["temporal_dimension"] = "time" dset.dataset_description.parameters["zonal_dimension"] = "lon" dset.dataset_description.parameters["meridional_dimension"] = "lat" # Create DataSourceModel dsrc_model = DataSourceModel(name="dap_model") dsrc_model.model = "DAP" dsrc_model.data_handler_module = "eoi.agent.handler.dap_external_data_handler" dsrc_model.data_handler_class = "DapExternalDataHandler" ## Run everything through DAMS ds_id = self.ncom_ds_id = self.dams_cli.create_external_dataset( external_dataset=dset) ext_dprov_id = self.dams_cli.create_external_data_provider( external_data_provider=dprov) ext_dsrc_id = self.dams_cli.create_data_source(data_source=dsrc) ext_dsrc_model_id = self.dams_cli.create_data_source_model(dsrc_model) # Register the ExternalDataset dproducer_id = self.dams_cli.register_external_data_set( external_dataset_id=ds_id) ## Associate everything # Convenience method # self.dams_cli.assign_eoi_resources(external_data_provider_id=ext_dprov_id, data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id, external_dataset_id=ds_id, external_data_agent_id=eda_id, agent_instance_id=eda_inst_id) # Or using each method self.dams_cli.assign_data_source_to_external_data_provider( data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id) self.dams_cli.assign_data_source_to_data_model( data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id) self.dams_cli.assign_external_dataset_to_data_source( external_dataset_id=ds_id, data_source_id=ext_dsrc_id) self.dams_cli.assign_external_dataset_to_agent_instance( external_dataset_id=ds_id, agent_instance_id=eda_inst_id) # self.dams_cli.assign_external_dataset_agent_to_data_model(external_data_agent_id=eda_id, data_source_model_id=ext_dsrc_model_id) # self.dams_cli.assign_external_data_agent_to_agent_instance(external_data_agent_id=eda_id, agent_instance_id=eda_inst_id) # Generate the data product and associate it to the ExternalDataset dprod = DataProduct(name='ncom_product', description='raw ncom product') dproduct_id = self.dpms_cli.create_data_product(data_product=dprod) self.dams_cli.assign_data_product(input_resource_id=ds_id, data_product_id=dproduct_id, create_stream=True)
def test_pydap(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition("example", parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) tdom, sdom = time_series_domain() dp = DataProduct(name="example") dp.spatial_domain = sdom.dump() dp.temporal_domain = tdom.dump() data_product_id = self.data_product_management.create_data_product(dp, stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) self.data_product_management.activate_data_product_persistence(data_product_id) self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id) dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0] monitor = DatasetMonitor(dataset_id) self.addCleanup(monitor.stop) rdt = ph.get_rdt(stream_def_id) ph.fill_rdt(rdt, 10) ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(monitor.wait()) gevent.sleep(1) # Yield to other greenlets, had an issue with connectivity pydap_host = CFG.get_safe("server.pydap.host", "localhost") pydap_port = CFG.get_safe("server.pydap.port", 8001) url = "http://%s:%s/%s" % (pydap_host, pydap_port, data_product_id) for i in xrange(3): # Do it three times to test that the cache doesn't corrupt the requests/responses ds = open_url(url) np.testing.assert_array_equal(list(ds["data"]["time"]), np.arange(10)) untested = [] for k, v in rdt.iteritems(): if k == rdt.temporal_parameter: continue context = rdt.context(k) if isinstance(context.param_type, QuantityType): np.testing.assert_array_equal(list(ds["data"][k]), rdt[k]) elif isinstance(context.param_type, ArrayType): if context.param_type.inner_encoding is None: values = np.empty(rdt[k].shape, dtype="O") for i, obj in enumerate(rdt[k]): values[i] = str(obj) np.testing.assert_array_equal(list(ds["data"][k]), values) elif len(rdt[k].shape) > 1: values = np.empty(rdt[k].shape[0], dtype="O") for i in xrange(rdt[k].shape[0]): values[i] = ",".join(map(lambda x: str(x), rdt[k][i].tolist())) elif isinstance(context.param_type, ConstantType): np.testing.assert_array_equal(list(ds["data"][k]), rdt[k]) elif isinstance(context.param_type, CategoryType): np.testing.assert_array_equal(list(ds["data"][k]), rdt[k]) else: untested.append("%s (%s)" % (k, context.param_type)) if untested: raise AssertionError("Untested parameters: %s" % untested)
def _setup_hfr(self): # TODO: some or all of this (or some variation) should move to DAMS # Create and register the necessary resources/objects eda = ExternalDatasetAgent() eda_id = self.dams_cli.create_external_dataset_agent(eda) eda_inst = ExternalDatasetAgentInstance() eda_inst_id = self.dams_cli.create_external_dataset_agent_instance( eda_inst, external_dataset_agent_id=eda_id) # Create DataProvider dprov = ExternalDataProvider(institution=Institution(), contact=ContactInformation()) # dprov.institution.name = "HFR UCSD" # Create DataSource dsrc = DataSource(protocol_type="DAP", institution=Institution(), contact=ContactInformation()) dsrc.connection_params[ "base_data_url"] = "http://hfrnet.ucsd.edu:8080/thredds/dodsC/" # Create ExternalDataset dset = ExternalDataset(name="UCSD HFR", dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation()) dset.dataset_description.parameters[ "dataset_path"] = "HFRNet/USEGC/6km/hourly/RTV" # dset.dataset_description.parameters["dataset_path"] = "test_data/hfr.nc" dset.dataset_description.parameters["temporal_dimension"] = "time" dset.dataset_description.parameters["zonal_dimension"] = "lon" dset.dataset_description.parameters["meridional_dimension"] = "lat" # Create DataSourceModel dsrc_model = DataSourceModel(name="dap_model") dsrc_model.model = "DAP" dsrc_model.data_handler_module = "eoi.agent.handler.dap_external_data_handler" dsrc_model.data_handler_class = "DapExternalDataHandler" ## Run everything through DAMS ds_id = self.hfr_ds_id = self.dams_cli.create_external_dataset( external_dataset=dset) ext_dprov_id = self.dams_cli.create_external_data_provider( external_data_provider=dprov) ext_dsrc_id = self.dams_cli.create_data_source(data_source=dsrc) ext_dsrc_model_id = self.dams_cli.create_data_source_model(dsrc_model) # Register the ExternalDataset dproducer_id = self.dams_cli.register_external_data_set( external_dataset_id=ds_id) ## Associate everything # Convenience method # self.dams_cli.assign_eoi_resources(external_data_provider_id=ext_dprov_id, data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id, external_dataset_id=ds_id, external_data_agent_id=eda_id, agent_instance_id=eda_inst_id) # Or using each method self.dams_cli.assign_data_source_to_external_data_provider( data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id) self.dams_cli.assign_data_source_to_data_model( data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id) self.dams_cli.assign_external_dataset_to_data_source( external_dataset_id=ds_id, data_source_id=ext_dsrc_id) self.dams_cli.assign_external_dataset_to_agent_instance( external_dataset_id=ds_id, agent_instance_id=eda_inst_id) # self.dams_cli.assign_external_dataset_agent_to_data_model(external_data_agent_id=eda_id, data_source_model_id=ext_dsrc_model_id) # self.dams_cli.assign_external_data_agent_to_agent_instance(external_data_agent_id=eda_id, agent_instance_id=eda_inst_id) # Generate the data product and associate it to the ExternalDataset dprod = DataProduct(name='hfr_product', description='raw hfr product') dproduct_id = self.dpms_cli.create_data_product(data_product=dprod) self.dams_cli.assign_data_product(input_resource_id=ds_id, data_product_id=dproduct_id, create_stream=True)