def test_data_product_search(self):

        # Create the dataproduct
        dp = DataProduct(name='test_product')
        dp.data_format.name = 'test_signal'
        dp.data_format.description = 'test signal'
        dp.data_format.character_set = 'utf8'
        dp.data_format.nominal_sampling_rate_maximum = '44000'
        dp.data_format.nominal_sampling_rate_minimum = '44000'
        dp.data_product_level = 'basic'
        dp_id, _ = self.rr.create(dp)

        search_string = "search 'data_format.name' is 'test_signal' from 'data_products_index'"
        results = self.poll(9, self.discovery.parse, search_string)

        self.assertIsNotNone(results, 'Results not found')
        self.assertTrue(results[0]['_id'] == dp_id)

        search_string = "search 'data_product_level' is 'basic' from 'data_products_index'"
        results = self.poll(9, self.discovery.parse, search_string)

        self.assertIsNotNone(results, 'Results not found')
        self.assertTrue(results[0]['_id'] == dp_id)
        
        search_string = "search 'data_format.character_set' is 'utf8' from 'data_products_index'"
        results = self.poll(9, self.discovery.parse, search_string)

        self.assertIsNotNone(results, 'Results not found')
        self.assertTrue(results[0]['_id'] == dp_id)
    def test_data_product_search(self):

        # Create the dataproduct
        dp = DataProduct(name='test_product')
        dp.data_format.name = 'test_signal'
        dp.data_format.description = 'test signal'
        dp.data_format.character_set = 'utf8'
        dp.data_format.nominal_sampling_rate_maximum = '44000'
        dp.data_format.nominal_sampling_rate_minimum = '44000'
        dp.CDM_data_type = 'basic'
        dp_id, _ = self.rr.create(dp)

        search_string = "search 'data_format.name' is 'test_signal' from 'data_products_index'"
        results = self.poll(9, self.discovery.parse, search_string)

        self.assertIsNotNone(results, 'Results not found')
        self.assertTrue(results[0]['_id'] == dp_id)

        search_string = "search 'CDM_data_type' is 'basic' from 'data_products_index'"
        results = self.poll(9, self.discovery.parse, search_string)

        self.assertIsNotNone(results, 'Results not found')
        self.assertTrue(results[0]['_id'] == dp_id)
        
        search_string = "search 'data_format.character_set' is 'utf8' from 'data_products_index'"
        results = self.poll(9, self.discovery.parse, search_string)

        self.assertIsNotNone(results, 'Results not found')
        self.assertTrue(results[0]['_id'] == dp_id)
Exemple #3
0
    def test_data_product_subscription(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        tdom, sdom = time_series_domain()
        dp = DataProduct(name='ctd parsed')
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()

        data_product_id = self.data_product_management.create_data_product(data_product=dp, stream_definition_id=stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)

        subscription_id = self.pubsub_management.create_subscription('validator', data_product_ids=[data_product_id])
        self.addCleanup(self.pubsub_management.delete_subscription, subscription_id)

        validated = Event()
        def validation(msg, route, stream_id):
            validated.set()

        stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        dp_stream_id = stream_ids.pop()

        validator = StandaloneStreamSubscriber('validator', callback=validation)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub_management.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, subscription_id)

        route = self.pubsub_management.read_stream_route(dp_stream_id)

        publisher = StandaloneStreamPublisher(dp_stream_id, route)
        publisher.publish('hi')
        self.assertTrue(validated.wait(10))
    def setup_resource(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition('example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='example')
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()

        data_product_id = self.data_product_management.create_data_product(dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)

        self.data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)

        # Yield to other greenlets, had an issue with connectivity
        gevent.sleep(1)

        self.offering_id = dataset_id
    def test_pydap(self):
        if not CFG.get_safe('bootstrap.use_pydap',False):
            raise unittest.SkipTest('PyDAP is off (bootstrap.use_pydap)')
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition('example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='example')
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()

        data_product_id = self.data_product_management.create_data_product(dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)
        
        self.data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt,10)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))


        gevent.sleep(1) # Yield to other greenlets, had an issue with connectivity

        pydap_host = CFG.get_safe('server.pydap.host','localhost')
        pydap_port = CFG.get_safe('server.pydap.port',8001)
        url = 'http://%s:%s/%s' %(pydap_host, pydap_port, dataset_id)

        ds = open_url(url)
        np.testing.assert_array_equal(ds['time'][:], np.arange(10))
        untested = []
        for k,v in rdt.iteritems():
            if k==rdt.temporal_parameter:
                continue
            context = rdt.context(k)
            if isinstance(context.param_type, QuantityType):
                np.testing.assert_array_equal(ds[k][k][:][0], rdt[k])
            elif isinstance(context.param_type, ArrayType):
                values = np.empty(rdt[k].shape, dtype='O')
                for i,obj in enumerate(rdt[k]):
                    values[i] = str(obj)
                np.testing.assert_array_equal(ds[k][k][:][0], values)
            elif isinstance(context.param_type, ConstantType):
                np.testing.assert_array_equal(ds[k][k][:][0], rdt[k])
            elif isinstance(context.param_type, CategoryType):
                np.testing.assert_array_equal(ds[k][k][:][0], rdt[k])
            else:
                untested.append('%s (%s)' % (k,context.param_type))
        if untested:
            raise AssertionError('Untested parameters: %s' % untested)
 def make_data_product(self, pdict_name, dp_name, available_fields=[]):
     pdict_id = self.dataset_management.read_parameter_dictionary_by_name(pdict_name, id_only=True)
     stream_def_id = self.pubsub_management.create_stream_definition('%s stream_def' % dp_name, parameter_dictionary_id=pdict_id, available_fields=available_fields or None)
     self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
     tdom, sdom = time_series_domain()
     tdom = tdom.dump()
     sdom = sdom.dump()
     dp_obj = DataProduct(name=dp_name)
     dp_obj.temporal_domain = tdom
     dp_obj.spatial_domain = sdom
     data_product_id = self.data_product_management.create_data_product(dp_obj, stream_definition_id=stream_def_id)
     self.addCleanup(self.data_product_management.delete_data_product, data_product_id)
     return data_product_id
    def test_qc_attachment(self):
        instrument_device = InstrumentDevice(name='whatever')
        instrument_device_id,_ = self.rrclient.create(instrument_device)
        self.addCleanup(self.rrclient.delete, instrument_device_id)
        self.client.register_instrument(instrument_device_id)
        self.addCleanup(self.client.unregister_instrument, instrument_device_id)
        dp = DataProduct(name='instrument output')

        dp_id,_ = self.rrclient.create(dp)
        self.addCleanup(self.rrclient.delete, dp_id)

        parser_id = self.make_grt_parser()
        attachment = Attachment(name='qc ref', attachment_type=AttachmentType.REFERENCE,content=global_range_test_document, context=ReferenceAttachmentContext(parser_id=parser_id))
        att_id = self.rrclient.create_attachment(dp_id, attachment)
        self.addCleanup(self.rrclient.delete_attachment, att_id)

        attachment2 = Attachment(name='qc ref2', attachment_type=AttachmentType.REFERENCE, content=global_range_test_document2, context=ReferenceAttachmentContext(parser_id=parser_id))
        att2_id = self.rrclient.create_attachment(dp_id, attachment2)
        self.addCleanup(self.rrclient.delete_attachment, att2_id)

        self.client.assign_data_product(instrument_device_id, dp_id)
        self.addCleanup(self.client.unassign_data_product, instrument_device_id, dp_id)
        svm = StoredValueManager(self.container)
        doc = svm.read_value('grt_CE01ISSM-MF005-01-CTDBPC999_TEMPWAT')
        np.testing.assert_array_almost_equal(doc['grt_min_value'], -2.)
Exemple #8
0
    def load_data_product(self):
        dset_i = 0
        dataset_management      = DatasetManagementServiceClient()
        pubsub_management       = PubsubManagementServiceClient()
        data_product_management = DataProductManagementServiceClient()
        resource_registry       = self.container.instance.resource_registry
        dp_obj = DataProduct(
            name='instrument_data_product_%i' % dset_i,
            description='ctd stream test',
            processing_level_code='Parsed_Canonical')
        pdict_id = dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = pubsub_management.create_stream_definition(name='parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(pubsub_management.delete_stream_definition, stream_def_id)
        data_product_id = data_product_management.create_data_product(data_product=dp_obj, stream_definition_id=stream_def_id)
        self.addCleanup(data_product_management.delete_data_product, data_product_id)
        data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(data_product_management.suspend_data_product_persistence, data_product_id)

        stream_ids, assocs = resource_registry.find_objects(subject=data_product_id, predicate='hasStream', id_only=True)
        stream_id = stream_ids[0]
        route = pubsub_management.read_stream_route(stream_id)

        dataset_ids, assocs = resource_registry.find_objects(subject=data_product_id, predicate='hasDataset', id_only=True)
        dataset_id = dataset_ids[0]

        return data_product_id, stream_id, route, stream_def_id, dataset_id
    def test_get_data_from_FDW(self):
        # generate a data product and check that the FDW can get data
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition('example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='example')
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()

        data_product_id = self.data_product_management.create_data_product(dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)

        self.data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        gevent.sleep(1) # Yield to other greenlets, had an issue with connectivity

        print "--------------------------------"
        print dataset_id
        coverage_path = DatasetManagementService()._get_coverage_path(dataset_id)
        print coverage_path
        print "--------------------------------"

        #verify table exists in the DB (similar to above)
        # ....code...

        # check that the geoserver layer exists as above
        # ... code ....

        # make a WMS/WFS request...somet like this (or both)
        url = self.gs_host+'/geoserver/geonode/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=geonode:ooi_' + dataset_id + '_ooi&maxFeatures=1&outputFormat=csv'
        r = requests.get(url)
        assertTrue(r.status_code == 200)
    def test_create_dataset_verify_geoserver_layer(self):
        #generate layer and check that the service created it in geoserver
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition('example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='example')
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()

        data_product_id = self.data_product_management.create_data_product(dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)

        self.data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        gevent.sleep(1) # Yield to other greenlets, had an issue with connectivity

        log.debug("--------------------------------")
        log.debug(dataset_id)
        coverage_path = DatasetManagementService()._get_coverage_path(dataset_id)
        log.debug(coverage_path)
        log.debug("--------------------------------")

        # verify that the layer exists in geoserver
        try:
            r = requests.get(self.gs_rest_url + '/layers/ooi_' + dataset_id + '_ooi.xml', auth=(self.username, self.PASSWORD))
            self.assertTrue(r.status_code == 200)
        except Exception as e:
            log.error("check service and layer exist...%s", e)
            self.assertTrue(False)
Exemple #11
0
    def make_dp(self, stream_def_id):
        stream_def = self.resource_registry.read(stream_def_id)
        dp_obj = DataProduct(name=stream_def.name,
                             description=stream_def.name,
                             processing_level_code='Parsed_Canonical')

        data_product_id = self.data_product_management.create_data_product(
            dp_obj, stream_definition_id=stream_def_id)
        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        return data_product_id
Exemple #12
0
    def test_get_data_from_FDW(self):
        # generate a data product and check that the FDW can get data
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition(
            'example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        dp = DataProduct(name='example')

        data_product_id = self.data_product_management.create_data_product(
            dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product,
                        data_product_id)

        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        self.addCleanup(
            self.data_product_management.suspend_data_product_persistence,
            data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id,
                                                         PRED.hasDataset,
                                                         id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        gevent.sleep(
            1)  # Yield to other greenlets, had an issue with connectivity

        print "--------------------------------"
        print dataset_id
        coverage_path = DatasetManagementService()._get_coverage_path(
            dataset_id)
        print coverage_path
        print "--------------------------------"

        #verify table exists in the DB (similar to above)
        # ....code...

        # check that the geoserver layer exists as above
        # ... code ....

        # make a WMS/WFS request...somet like this (or both)
        url = self.gs_host + '/geoserver/geonode/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=geonode:ooi_' + dataset_id + '_ooi&maxFeatures=1&outputFormat=csv'
        r = requests.get(url)
        assertTrue(r.status_code == 200)
Exemple #13
0
    def test_create_dataset_verify_geoserver_layer(self):
        #generate layer and check that the service created it in geoserver
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition(
            'example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        dp = DataProduct(name='example')

        data_product_id = self.data_product_management.create_data_product(
            dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product,
                        data_product_id)

        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        self.addCleanup(
            self.data_product_management.suspend_data_product_persistence,
            data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id,
                                                         PRED.hasDataset,
                                                         id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        gevent.sleep(
            1)  # Yield to other greenlets, had an issue with connectivity

        log.debug("--------------------------------")
        log.debug(dataset_id)
        coverage_path = DatasetManagementService()._get_coverage_path(
            dataset_id)
        log.debug(coverage_path)
        log.debug("--------------------------------")

        # verify that the layer exists in geoserver
        try:
            r = requests.get(self.gs_rest_url + '/layers/ooi_' + dataset_id +
                             '_ooi.xml',
                             auth=(self.username, self.PASSWORD))
            self.assertTrue(r.status_code == 200)
        except Exception as e:
            log.error("check service and layer exist...%s", e)
            self.assertTrue(False)
    def test_ownership_searching(self):
        # Create two data products so that there is competition to the search, one is parsed 
        # (with conductivity as a parameter) and the other is raw
        dp = DataProduct(name='example dataproduct')
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict')
        stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id)
        tdom, sdom = time_series_domain()
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()
        dp_id = self.data_product_management.create_data_product(dp, stream_definition_id=stream_def_id, exchange_point='xp1')

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_raw_param_dict')
        stream_def_id = self.pubsub_management.create_stream_definition('ctd raw', parameter_dictionary_id=pdict_id)
        dp = DataProduct(name='WRONG')
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()
        self.data_product_management.create_data_product(dp, stream_definition_id=stream_def_id, exchange_point='xp1')

        parameter_search = 'search "name" is "conductivity" from "resources_index"'
        results = self.poll(9, self.discovery.parse, parameter_search)
        param_id = results[0]['_id']

        data_product_search = 'search "name" is "*" from "data_products_index" and has "%s"' % param_id
        results = self.poll(9, self.discovery.parse, data_product_search)
        print results
        self.assertEquals(results[0], dp_id)
    def create_data_product(self, name='', description='', stream_def_id=''):
        dp_obj = DataProduct(name=name,
                             description=description,
                             processing_level_code='Parsed_Canonical')

        data_product_id = self.data_product_management.create_data_product(
            data_product=dp_obj, stream_definition_id=stream_def_id)
        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        self.addCleanup(
            self.data_product_management.suspend_data_product_persistence,
            data_product_id)
        return data_product_id
    def test_create_dataset(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition('example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='example')
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()

        data_product_id = self.data_product_management.create_data_product(dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)

        self.data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        # Yield to other greenlets, had an issue with connectivity
        gevent.sleep(1)

        log.debug("--------------------------------")
        log.debug(dataset_id)
        coverage_path = DatasetManagementService()._get_coverage_path(dataset_id)
        log.debug(coverage_path)
        log.debug("--------------------------------")

        breakpoint(locals(), globals())
Exemple #17
0
    def create_data_product(self, name='', description='', stream_def_id=''):
        tdom, sdom = time_series_domain()
        tdom = tdom.dump()
        sdom = sdom.dump()
        dp_obj = DataProduct(
            name=name,
            description=description,
            processing_level_code='Parsed_Canonical',
            temporal_domain=tdom,
            spatial_domain=sdom)

        data_product_id = self.data_product_management.create_data_product(data_product=dp_obj, stream_definition_id=stream_def_id)
        self.data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id)
        return data_product_id
Exemple #18
0
    def make_dp(self, stream_def_id):
        tdom, sdom = time_series_domain()
        tdom = tdom.dump()
        sdom = sdom.dump()
        stream_def = self.resource_registry.read(stream_def_id)
        dp_obj = DataProduct(name=stream_def.name,
                             description=stream_def.name,
                             processing_level_code='Parsed_Canonical',
                             temporal_domain=tdom,
                             spatial_domain=sdom)

        data_product_id = self.data_product_management.create_data_product(
            dp_obj, stream_definition_id=stream_def_id)
        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        return data_product_id
    def create_data_product(self,name, stream_def_id='', param_dict_name='', pdict_id='', stream_configuration=None):
        if not (stream_def_id or param_dict_name or pdict_id):
            raise AssertionError('Attempted to create a Data Product without a parameter dictionary')


        dp = DataProduct(name=name)

        stream_def_id = stream_def_id or self.create_stream_definition('%s stream def' % name, 
                parameter_dictionary_id=pdict_id or self.RR2.find_resource_by_name(RT.ParameterDictionary,
                    param_dict_name, id_only=True))


        stream_config = stream_configuration or StreamConfiguration(stream_name='parsed_ctd', stream_type=StreamConfigurationType.PARSED)

        data_product_id = self.data_product_management.create_data_product(dp, stream_definition_id=stream_def_id, default_stream_configuration=stream_config)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)
        return data_product_id
 def make_data_product(self, pdict_name, dp_name, available_fields=None):
     self.pubsub_management = PubsubManagementServiceClient()
     if available_fields is None: available_fields = []
     pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
         pdict_name, id_only=True)
     stream_def_id = self.pubsub_management.create_stream_definition(
         '%s stream_def' % dp_name,
         parameter_dictionary_id=pdict_id,
         available_fields=available_fields or None)
     self.addCleanup(self.pubsub_management.delete_stream_definition,
                     stream_def_id)
     dp_obj = DataProduct(name=dp_name)
     data_product_id = self.data_product_management.create_data_product(
         dp_obj, stream_definition_id=stream_def_id)
     self.addCleanup(self.data_product_management.delete_data_product,
                     data_product_id)
     return data_product_id
Exemple #21
0
    def create_data_product(self,name, stream_def_id='', param_dict_name='', pdict_id=''):
        if not (stream_def_id or param_dict_name or pdict_id):
            raise AssertionError('Attempted to create a Data Product without a parameter dictionary')

        tdom, sdom = time_series_domain()

        dp = DataProduct(name=name,
                spatial_domain = sdom.dump(),
                temporal_domain = tdom.dump(),
                )

        stream_def_id = stream_def_id or self.create_stream_definition('%s stream def' % name, 
                parameter_dictionary_id=pdict_id or self.RR2.find_resource_by_name(RT.ParameterDictionary,
                    param_dict_name, id_only=True))

        data_product_id = self.data_product_management.create_data_product(dp, stream_definition_id=stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)
        return data_product_id
Exemple #22
0
    def test_create_dataset(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition(
            'example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        dp = DataProduct(name='example')

        data_product_id = self.data_product_management.create_data_product(
            dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product,
                        data_product_id)

        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        self.addCleanup(
            self.data_product_management.suspend_data_product_persistence,
            data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id,
                                                         PRED.hasDataset,
                                                         id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        # Yield to other greenlets, had an issue with connectivity
        gevent.sleep(1)

        log.debug("--------------------------------")
        log.debug(dataset_id)
        coverage_path = DatasetManagementService()._get_coverage_path(
            dataset_id)
        log.debug(coverage_path)
        log.debug("--------------------------------")

        breakpoint(locals(), globals())
Exemple #23
0
    def test_fdt_created_during(self):
        # generate a data product and check that the FDT exists
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition(
            'example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        dp = DataProduct(name='example')

        data_product_id = self.data_product_management.create_data_product(
            dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product,
                        data_product_id)

        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        self.addCleanup(
            self.data_product_management.suspend_data_product_persistence,
            data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id,
                                                         PRED.hasDataset,
                                                         id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        gevent.sleep(
            1)  # Yield to other greenlets, had an issue with connectivity

        print "--------------------------------"
        print dataset_id
        coverage_path = DatasetManagementService()._get_coverage_path(
            dataset_id)
        print coverage_path
        print "--------------------------------"
Exemple #24
0
    def setup_resource(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition(
            'example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        dp = DataProduct(name='example')

        data_product_id = self.data_product_management.create_data_product(
            dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product,
                        data_product_id)

        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        self.addCleanup(
            self.data_product_management.suspend_data_product_persistence,
            data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id,
                                                         PRED.hasDataset,
                                                         id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)

        # Yield to other greenlets, had an issue with connectivity
        gevent.sleep(1)

        self.offering_id = dataset_id
Exemple #25
0
    def test_add_parameter_to_data_product(self):
        #self.preload_ui()
        self.test_add_parameter_function()
        data_product_id = self.data_product_id
        stream_def_id = self.resource_registry.find_objects(
            data_product_id, PRED.hasStreamDefinition, id_only=True)[0][0]
        pdict_id = self.resource_registry.find_objects(
            stream_def_id, PRED.hasParameterDictionary, id_only=True)[0][0]
        # Create a new data product htat represents the L1 temp from the ctd simulator
        dp = DataProduct(name='CTD Simulator TEMPWAT L1',
                         category=DataProductTypeEnum.DERIVED)
        stream_def_id = self.pubsub_management.create_stream_definition(
            name='tempwat_l1',
            parameter_dictionary_id=pdict_id,
            available_fields=['time', 'temp'])
        dp_id = self.data_product_management.create_data_product(
            dp,
            stream_definition_id=stream_def_id,
            parent_data_product_id=data_product_id)

        parameter_function = ParameterFunction(name='linear_corr',
                                               function_type=PFT.NUMEXPR,
                                               function='a * x + b',
                                               args=['x', 'a', 'b'])
        pf_id = self.dataset_management.create_parameter_function(
            parameter_function)

        dpd = DataProcessDefinition(name='linear_corr',
                                    description='Linear Correction')
        self.data_process_management.create_data_process_definition(dpd, pf_id)

        parameter = ParameterContext(name='temperature_corrected',
                                     parameter_type='function',
                                     parameter_function_id=pf_id,
                                     parameter_function_map={
                                         'x': 'temp',
                                         'a': 1.03,
                                         'b': 0.25
                                     },
                                     value_encoding='float32',
                                     units='deg_C',
                                     display_name='Temperature Corrected')
        p_id = self.dataset_management.create_parameter(parameter)

        # Add it to the parent or parsed data product
        self.data_product_management.add_parameter_to_data_product(
            p_id, data_product_id)

        # Then update the child's stream definition to include it
        stream_def = self.pubsub_management.read_stream_definition(
            stream_def_id)
        stream_def.available_fields.append('temperature_corrected')
        self.resource_registry.update(stream_def)

        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(
            data_product_id)

        # For some reason, it takes numerous seconds of yielding with gevent for the coverage to actually save...
        gevent.sleep(10)

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(
            rdt['temperature_corrected'],
            np.arange(30, dtype=np.float32) * 1.03 + 0.25,
            decimal=5)
    def test_lookup_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()
        stream_def_id = self.pubsubcli.create_stream_definition(
            'lookup', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, stream_def_id)

        data_product = DataProduct(name='lookup data product')
        tdom, sdom = time_series_domain()
        data_product.temporal_domain = tdom.dump()
        data_product.spatial_domain = sdom.dump()

        data_product_id = self.dpsc_cli.create_data_product(
            data_product, stream_definition_id=stream_def_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, data_product_id)
        data_producer = DataProducer(name='producer')
        data_producer.producer_context = DataProcessProducerContext()
        data_producer.producer_context.configuration['qc_keys'] = [
            'offset_document'
        ]
        data_producer_id, _ = self.rrclient.create(data_producer)
        self.addCleanup(self.rrclient.delete, data_producer_id)
        assoc, _ = self.rrclient.create_association(
            subject=data_product_id,
            object=data_producer_id,
            predicate=PRED.hasDataProducer)
        self.addCleanup(self.rrclient.delete_association, assoc)

        document_keys = self.damsclient.list_qc_references(data_product_id)

        self.assertEquals(document_keys, ['offset_document'])
        svm = StoredValueManager(self.container)
        svm.stored_value_cas('offset_document', {'offset_a': 2.0})
        self.dpsc_cli.activate_data_product_persistence(data_product_id)
        dataset_ids, _ = self.rrclient.find_objects(subject=data_product_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        dataset_id = dataset_ids[0]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()

        stream_ids, _ = self.rrclient.find_objects(subject=data_product_id,
                                                   predicate=PRED.hasStream,
                                                   id_only=True)
        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(granule)

        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['temp'], rdt2['temp'])
        np.testing.assert_array_almost_equal(rdt2['calibrated'],
                                             np.array([22.0]))

        svm.stored_value_cas('updated_document', {'offset_a': 3.0})
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ep = EventPublisher(event_type=OT.ExternalReferencesUpdatedEvent)
        ep.publish_event(origin=data_product_id,
                         reference_keys=['updated_document'])

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [1]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()
        gevent.sleep(2)  # Yield so that the event goes through
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt2['temp'], np.array([20., 20.]))
        np.testing.assert_array_almost_equal(rdt2['calibrated'],
                                             np.array([22.0, 23.0]))
    def test_pydap(self):
        if not CFG.get_safe('bootstrap.use_pydap', False):
            raise unittest.SkipTest('PyDAP is off (bootstrap.use_pydap)')
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition(
            'example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='example')
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()

        data_product_id = self.data_product_management.create_data_product(
            dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product,
                        data_product_id)

        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        self.addCleanup(
            self.data_product_management.suspend_data_product_persistence,
            data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id,
                                                         PRED.hasDataset,
                                                         id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 10)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        gevent.sleep(
            1)  # Yield to other greenlets, had an issue with connectivity

        pydap_host = CFG.get_safe('server.pydap.host', 'localhost')
        pydap_port = CFG.get_safe('server.pydap.port', 8001)
        url = 'http://%s:%s/%s' % (pydap_host, pydap_port, dataset_id)

        for i in xrange(
                3
        ):  # Do it three times to test that the cache doesn't corrupt the requests/responses
            ds = open_url(url)
            np.testing.assert_array_equal(ds['time'][:], np.arange(10))
            untested = []
            for k, v in rdt.iteritems():
                if k == rdt.temporal_parameter:
                    continue
                context = rdt.context(k)
                if isinstance(context.param_type, QuantityType):
                    np.testing.assert_array_equal(ds[k][k][:][0], rdt[k])
                elif isinstance(context.param_type, ArrayType):
                    if context.param_type.inner_encoding is None:
                        values = np.empty(rdt[k].shape, dtype='O')
                        for i, obj in enumerate(rdt[k]):
                            values[i] = str(obj)
                        np.testing.assert_array_equal(ds[k][k][:][0], values)
                    elif len(rdt[k].shape) > 1:
                        values = np.empty(rdt[k].shape[0], dtype='O')
                        for i in xrange(rdt[k].shape[0]):
                            values[i] = ','.join(
                                map(lambda x: str(x), rdt[k][i].tolist()))
                elif isinstance(context.param_type, ConstantType):
                    np.testing.assert_array_equal(ds[k][k][:][0], rdt[k])
                elif isinstance(context.param_type, CategoryType):
                    np.testing.assert_array_equal(ds[k][k][:][0], rdt[k])
                else:
                    untested.append('%s (%s)' % (k, context.param_type))
            if untested:
                raise AssertionError('Untested parameters: %s' % untested)
Exemple #28
0
    def test_derived_data_product(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition,
                        ctd_stream_def_id)

        dp = DataProduct(name='Instrument DP')
        dp_id = self.dpsc_cli.create_data_product(
            dp, stream_definition_id=ctd_stream_def_id)
        self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id)

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" %
                           str(dp_id))
        dataset_id = dataset_ids[0]

        # Make the derived data product
        simple_stream_def_id = self.pubsubcli.create_stream_definition(
            name='TEMPWAT stream def',
            parameter_dictionary_id=pdict_id,
            available_fields=['time', 'temp'])
        tempwat_dp = DataProduct(name='TEMPWAT',
                                 category=DataProductTypeEnum.DERIVED)
        tempwat_dp_id = self.dpsc_cli.create_data_product(
            tempwat_dp,
            stream_definition_id=simple_stream_def_id,
            parent_data_product_id=dp_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id)
        # Check that the streams associated with the data product are persisted with
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                   RT.Stream, True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        rdt['pressure'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id, route)

        dataset_modified = Event()

        def cb(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb,
                             origin=dataset_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id,
                                                            PRED.hasDataset,
                                                            id_only=True)
        tempwat_dataset_id = tempwat_dataset_ids[0]
        granule = self.data_retriever.retrieve(
            tempwat_dataset_id, delivery_format=simple_stream_def_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        self.assertEquals(set(rdt.fields), set(['time', 'temp']))
Exemple #29
0
    def _setup_resources(self):
        # TODO: some or all of this (or some variation) should move to DAMS'

        # Build the test resources for the dataset
        dams_cli = DataAcquisitionManagementServiceClient()
        dpms_cli = DataProductManagementServiceClient()
        rr_cli = ResourceRegistryServiceClient()

        eda = ExternalDatasetAgent()
        eda_id = dams_cli.create_external_dataset_agent(eda)

        eda_inst = ExternalDatasetAgentInstance()
        eda_inst_id = dams_cli.create_external_dataset_agent_instance(eda_inst, external_dataset_agent_id=eda_id)

        # Create and register the necessary resources/objects

        # Create DataProvider
        dprov = ExternalDataProvider(institution=Institution(), contact=ContactInformation())
        dprov.contact.name = 'Christopher Mueller'
        dprov.contact.email = '*****@*****.**'

        # Create DataSource
        dsrc = DataSource(protocol_type='FILE', institution=Institution(), contact=ContactInformation())
        dsrc.connection_params['base_data_url'] = ''
        dsrc.contact.name='Tim Giguere'
        dsrc.contact.email = '*****@*****.**'

        # Create ExternalDataset
        ds_name = 'slocum_test_dataset'
        dset = ExternalDataset(name=ds_name, dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation())

        dset.dataset_description.parameters['dataset_path'] = 'test_data/ru05-2012-021-0-0-sbd.dat'
        dset.dataset_description.parameters['temporal_dimension'] = None
        dset.dataset_description.parameters['zonal_dimension'] = None
        dset.dataset_description.parameters['meridional_dimension'] = None
        dset.dataset_description.parameters['vertical_dimension'] = None
        dset.dataset_description.parameters['variables'] = [
            'c_wpt_y_lmc',
            'sci_water_cond',
            'm_y_lmc',
            'u_hd_fin_ap_inflection_holdoff',
            'sci_m_present_time',
            'm_leakdetect_voltage_forward',
            'sci_bb3slo_b660_scaled',
            'c_science_send_all',
            'm_gps_status',
            'm_water_vx',
            'm_water_vy',
            'c_heading',
            'sci_fl3slo_chlor_units',
            'u_hd_fin_ap_gain',
            'm_vacuum',
            'u_min_water_depth',
            'm_gps_lat',
            'm_veh_temp',
            'f_fin_offset',
            'u_hd_fin_ap_hardover_holdoff',
            'c_alt_time',
            'm_present_time',
            'm_heading',
            'sci_bb3slo_b532_scaled',
            'sci_fl3slo_cdom_units',
            'm_fin',
            'x_cycle_overrun_in_ms',
            'sci_water_pressure',
            'u_hd_fin_ap_igain',
            'sci_fl3slo_phyco_units',
            'm_battpos',
            'sci_bb3slo_b470_scaled',
            'm_lat',
            'm_gps_lon',
            'sci_ctd41cp_timestamp',
            'm_pressure',
            'c_wpt_x_lmc',
            'c_ballast_pumped',
            'x_lmc_xy_source',
            'm_lon',
            'm_avg_speed',
            'sci_water_temp',
            'u_pitch_ap_gain',
            'm_roll',
            'm_tot_num_inflections',
            'm_x_lmc',
            'u_pitch_ap_deadband',
            'm_final_water_vy',
            'm_final_water_vx',
            'm_water_depth',
            'm_leakdetect_voltage',
            'u_pitch_max_delta_battpos',
            'm_coulomb_amphr',
            'm_pitch',
        ]

        # Create DataSourceModel
        dsrc_model = DataSourceModel(name='slocum_model')
        dsrc_model.model = 'SLOCUM'
        dsrc_model.data_handler_module = 'N/A'
        dsrc_model.data_handler_class = 'N/A'

        ## Run everything through DAMS
        ds_id = dams_cli.create_external_dataset(external_dataset=dset)
        ext_dprov_id = dams_cli.create_external_data_provider(external_data_provider=dprov)
        ext_dsrc_id = dams_cli.create_data_source(data_source=dsrc)
        ext_dsrc_model_id = dams_cli.create_data_source_model(dsrc_model)

        # Register the ExternalDataset
        dproducer_id = dams_cli.register_external_data_set(external_dataset_id=ds_id)

        # Or using each method
        dams_cli.assign_data_source_to_external_data_provider(data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id)
        dams_cli.assign_data_source_to_data_model(data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id)
        dams_cli.assign_external_dataset_to_data_source(external_dataset_id=ds_id, data_source_id=ext_dsrc_id)
        dams_cli.assign_external_dataset_to_agent_instance(external_dataset_id=ds_id, agent_instance_id=eda_inst_id)
        #        dams_cli.assign_external_data_agent_to_agent_instance(external_data_agent_id=self.eda_id, agent_instance_id=self.eda_inst_id)

        # Generate the data product and associate it to the ExternalDataset
        dprod = DataProduct(name='slocum_parsed_product', description='parsed slocum product')
        dproduct_id = dpms_cli.create_data_product(data_product=dprod)

        dams_cli.assign_data_product(input_resource_id=ds_id, data_product_id=dproduct_id, create_stream=True)

        stream_id, assn = rr_cli.find_objects(subject=dproduct_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True)
        stream_id = stream_id[0]

        log.info('Created resources: {0}'.format({'ExternalDataset':ds_id, 'ExternalDataProvider':ext_dprov_id, 'DataSource':ext_dsrc_id, 'DataSourceModel':ext_dsrc_model_id, 'DataProducer':dproducer_id, 'DataProduct':dproduct_id, 'Stream':stream_id}))

        #CBM: Use CF standard_names

        ttool = TaxyTool()

        ttool.add_taxonomy_set('c_wpt_y_lmc'),
        ttool.add_taxonomy_set('sci_water_cond'),
        ttool.add_taxonomy_set('m_y_lmc'),
        ttool.add_taxonomy_set('u_hd_fin_ap_inflection_holdoff'),
        ttool.add_taxonomy_set('sci_m_present_time'),
        ttool.add_taxonomy_set('m_leakdetect_voltage_forward'),
        ttool.add_taxonomy_set('sci_bb3slo_b660_scaled'),
        ttool.add_taxonomy_set('c_science_send_all'),
        ttool.add_taxonomy_set('m_gps_status'),
        ttool.add_taxonomy_set('m_water_vx'),
        ttool.add_taxonomy_set('m_water_vy'),
        ttool.add_taxonomy_set('c_heading'),
        ttool.add_taxonomy_set('sci_fl3slo_chlor_units'),
        ttool.add_taxonomy_set('u_hd_fin_ap_gain'),
        ttool.add_taxonomy_set('m_vacuum'),
        ttool.add_taxonomy_set('u_min_water_depth'),
        ttool.add_taxonomy_set('m_gps_lat'),
        ttool.add_taxonomy_set('m_veh_temp'),
        ttool.add_taxonomy_set('f_fin_offset'),
        ttool.add_taxonomy_set('u_hd_fin_ap_hardover_holdoff'),
        ttool.add_taxonomy_set('c_alt_time'),
        ttool.add_taxonomy_set('m_present_time'),
        ttool.add_taxonomy_set('m_heading'),
        ttool.add_taxonomy_set('sci_bb3slo_b532_scaled'),
        ttool.add_taxonomy_set('sci_fl3slo_cdom_units'),
        ttool.add_taxonomy_set('m_fin'),
        ttool.add_taxonomy_set('x_cycle_overrun_in_ms'),
        ttool.add_taxonomy_set('sci_water_pressure'),
        ttool.add_taxonomy_set('u_hd_fin_ap_igain'),
        ttool.add_taxonomy_set('sci_fl3slo_phyco_units'),
        ttool.add_taxonomy_set('m_battpos'),
        ttool.add_taxonomy_set('sci_bb3slo_b470_scaled'),
        ttool.add_taxonomy_set('m_lat'),
        ttool.add_taxonomy_set('m_gps_lon'),
        ttool.add_taxonomy_set('sci_ctd41cp_timestamp'),
        ttool.add_taxonomy_set('m_pressure'),
        ttool.add_taxonomy_set('c_wpt_x_lmc'),
        ttool.add_taxonomy_set('c_ballast_pumped'),
        ttool.add_taxonomy_set('x_lmc_xy_source'),
        ttool.add_taxonomy_set('m_lon'),
        ttool.add_taxonomy_set('m_avg_speed'),
        ttool.add_taxonomy_set('sci_water_temp'),
        ttool.add_taxonomy_set('u_pitch_ap_gain'),
        ttool.add_taxonomy_set('m_roll'),
        ttool.add_taxonomy_set('m_tot_num_inflections'),
        ttool.add_taxonomy_set('m_x_lmc'),
        ttool.add_taxonomy_set('u_pitch_ap_deadband'),
        ttool.add_taxonomy_set('m_final_water_vy'),
        ttool.add_taxonomy_set('m_final_water_vx'),
        ttool.add_taxonomy_set('m_water_depth'),
        ttool.add_taxonomy_set('m_leakdetect_voltage'),
        ttool.add_taxonomy_set('u_pitch_max_delta_battpos'),
        ttool.add_taxonomy_set('m_coulomb_amphr'),
        ttool.add_taxonomy_set('m_pitch'),

        #CBM: Eventually, probably want to group this crap somehow - not sure how yet...

        # Create the logger for receiving publications
        self.create_stream_and_logger(name='slocum',stream_id=stream_id)

        self.EDA_RESOURCE_ID = ds_id
        self.EDA_NAME = ds_name
        self.DVR_CONFIG['dh_cfg'] = {
            'TESTING':True,
            'stream_id':stream_id,
            'external_dataset_res':dset,
            'taxonomy':ttool.dump(),
            'data_producer_id':dproducer_id,#CBM: Should this be put in the main body of the config - with mod & cls?
            'max_records':20,
        }
    def test_lookup_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()
        stream_def_id = self.pubsubcli.create_stream_definition('lookup', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, stream_def_id)

        data_product = DataProduct(name='lookup data product')
        tdom, sdom = time_series_domain()
        data_product.temporal_domain = tdom.dump()
        data_product.spatial_domain = sdom.dump()

        data_product_id = self.dpsc_cli.create_data_product(data_product, stream_definition_id=stream_def_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, data_product_id)
        data_producer = DataProducer(name='producer')
        data_producer.producer_context = DataProcessProducerContext()
        data_producer.producer_context.configuration['qc_keys'] = ['offset_document']
        data_producer_id, _ = self.rrclient.create(data_producer)
        self.addCleanup(self.rrclient.delete, data_producer_id)
        assoc,_ = self.rrclient.create_association(subject=data_product_id, object=data_producer_id, predicate=PRED.hasDataProducer)
        self.addCleanup(self.rrclient.delete_association, assoc)

        document_keys = self.damsclient.list_qc_references(data_product_id)
            
        self.assertEquals(document_keys, ['offset_document'])
        svm = StoredValueManager(self.container)
        svm.stored_value_cas('offset_document', {'offset_a':2.0})
        self.dpsc_cli.activate_data_product_persistence(data_product_id)
        dataset_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasDataset, id_only=True)
        dataset_id = dataset_ids[0]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()

        stream_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(granule)

        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['temp'], rdt2['temp'])
        np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0]))


        svm.stored_value_cas('updated_document', {'offset_a':3.0})
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ep = EventPublisher(event_type=OT.ExternalReferencesUpdatedEvent)
        ep.publish_event(origin=data_product_id, reference_keys=['updated_document'])

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [1]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()
        gevent.sleep(2) # Yield so that the event goes through
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt2['temp'],np.array([20.,20.]))
        np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0,23.0]))
Exemple #31
0
    def test_get_data_product_provenance_report(self):
        #Create a test device
        device_obj = Device(name='Device1', description='test instrument site')
        device_id, _ = self.rrclient.create(device_obj)
        self.addCleanup(self.rrclient.delete, device_id)

        #Create a test DataProduct
        data_product1_obj = DataProduct(name='DataProduct1',
                                        description='test data product 1')
        data_product1_id, _ = self.rrclient.create(data_product1_obj)
        self.addCleanup(self.rrclient.delete, data_product1_id)

        #Create a test DataProcess
        data_process_obj = DataProcess(name='DataProcess',
                                       description='test data process')
        data_process_id, _ = self.rrclient.create(data_process_obj)
        self.addCleanup(self.rrclient.delete, data_process_id)

        #Create a second test DataProduct
        data_product2_obj = DataProduct(name='DataProduct2',
                                        description='test data product 2')
        data_product2_id, _ = self.rrclient.create(data_product2_obj)
        self.addCleanup(self.rrclient.delete, data_product2_id)

        #Create a test DataProducer
        data_producer_obj = DataProducer(name='DataProducer',
                                         description='test data producer')
        data_producer_id, rev = self.rrclient.create(data_producer_obj)

        #Link the DataProcess to the second DataProduct manually
        assoc_id, _ = self.rrclient.create_association(
            subject=data_process_id,
            predicate=PRED.hasInputProduct,
            object=data_product2_id)
        self.addCleanup(self.rrclient.delete_association, assoc_id)

        # Register the instrument and process. This links the device and the data process
        # with their own producers
        self.damsclient.register_instrument(device_id)
        self.addCleanup(self.damsclient.unregister_instrument, device_id)
        self.damsclient.register_process(data_process_id)
        self.addCleanup(self.damsclient.unregister_process, data_process_id)

        #Manually link the first DataProduct with the test DataProducer
        assoc_id, _ = self.rrclient.create_association(
            subject=data_product1_id,
            predicate=PRED.hasDataProducer,
            object=data_producer_id)

        #Get the DataProducer linked to the DataProcess (created in register_process above)
        #Associate that with with DataProduct1's DataProducer
        data_process_producer_ids, _ = self.rrclient.find_objects(
            subject=data_process_id,
            predicate=PRED.hasDataProducer,
            object_type=RT.DataProducer,
            id_only=True)
        assoc_id, _ = self.rrclient.create_association(
            subject=data_process_producer_ids[0],
            predicate=PRED.hasParent,
            object=data_producer_id)
        self.addCleanup(self.rrclient.delete_association, assoc_id)

        #Get the DataProducer linked to the Device (created in register_instrument
        #Associate that with the DataProcess's DataProducer
        device_producer_ids, _ = self.rrclient.find_objects(
            subject=device_id,
            predicate=PRED.hasDataProducer,
            object_type=RT.DataProducer,
            id_only=True)
        assoc_id, _ = self.rrclient.create_association(
            subject=data_producer_id,
            predicate=PRED.hasParent,
            object=device_producer_ids[0])

        #Create the links between the Device, DataProducts, DataProcess, and all DataProducers
        self.damsclient.assign_data_product(input_resource_id=device_id,
                                            data_product_id=data_product1_id)
        self.addCleanup(self.damsclient.unassign_data_product, device_id,
                        data_product1_id)
        self.damsclient.assign_data_product(input_resource_id=data_process_id,
                                            data_product_id=data_product2_id)
        self.addCleanup(self.damsclient.unassign_data_product, data_process_id,
                        data_product2_id)

        #Traverse through the relationships to get the links between objects
        res = self.dpmsclient.get_data_product_provenance_report(
            data_product2_id)

        #Make sure there are four keys
        self.assertEqual(len(res.keys()), 4)

        parent_count = 0
        config_count = 0
        for v in res.itervalues():
            if 'parent' in v:
                parent_count += 1
            if 'config' in v:
                config_count += 1

        #Make sure there are three parents and four configs
        self.assertEqual(parent_count, 3)
        self.assertEqual(config_count, 4)
    def _setup_resources(self):
        # TODO: some or all of this (or some variation) should move to DAMS'

        # Build the test resources for the dataset
        dams_cli = DataAcquisitionManagementServiceClient()
        dpms_cli = DataProductManagementServiceClient()
        rr_cli = ResourceRegistryServiceClient()

        eda = ExternalDatasetAgent()
        eda_id = dams_cli.create_external_dataset_agent(eda)

        eda_inst = ExternalDatasetAgentInstance()
        eda_inst_id = dams_cli.create_external_dataset_agent_instance(
            eda_inst, external_dataset_agent_id=eda_id)

        # Create and register the necessary resources/objects

        # Create DataProvider
        dprov = ExternalDataProvider(institution=Institution(),
                                     contact=ContactInformation())
        dprov.contact.name = 'Christopher Mueller'
        dprov.contact.email = '*****@*****.**'

        # Create DataSource
        dsrc = DataSource(protocol_type='DAP',
                          institution=Institution(),
                          contact=ContactInformation())
        dsrc.connection_params['base_data_url'] = ''
        dsrc.contact.name = 'Tim Giguere'
        dsrc.contact.email = '*****@*****.**'

        # Create ExternalDataset
        ds_name = 'usgs_test_dataset'
        dset = ExternalDataset(name=ds_name,
                               dataset_description=DatasetDescription(),
                               update_description=UpdateDescription(),
                               contact=ContactInformation())

        # The usgs.nc test dataset is a download of the R1 dataset found here:
        # http://thredds-test.oceanobservatories.org/thredds/dodsC/ooiciData/E66B1A74-A684-454A-9ADE-8388C2C634E5.ncml
        dset.dataset_description.parameters[
            'dataset_path'] = 'test_data/usgs.nc'
        dset.dataset_description.parameters['temporal_dimension'] = 'time'
        dset.dataset_description.parameters['zonal_dimension'] = 'lon'
        dset.dataset_description.parameters['meridional_dimension'] = 'lat'
        dset.dataset_description.parameters['vertical_dimension'] = 'z'
        dset.dataset_description.parameters['variables'] = [
            'water_temperature',
            'streamflow',
            'water_temperature_bottom',
            'water_temperature_middle',
            'specific_conductance',
            'data_qualifier',
        ]

        # Create DataSourceModel
        dsrc_model = DataSourceModel(name='dap_model')
        dsrc_model.model = 'DAP'
        dsrc_model.data_handler_module = 'N/A'
        dsrc_model.data_handler_class = 'N/A'

        ## Run everything through DAMS
        ds_id = dams_cli.create_external_dataset(external_dataset=dset)
        ext_dprov_id = dams_cli.create_external_data_provider(
            external_data_provider=dprov)
        ext_dsrc_id = dams_cli.create_data_source(data_source=dsrc)
        ext_dsrc_model_id = dams_cli.create_data_source_model(dsrc_model)

        # Register the ExternalDataset
        dproducer_id = dams_cli.register_external_data_set(
            external_dataset_id=ds_id)

        # Or using each method
        dams_cli.assign_data_source_to_external_data_provider(
            data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id)
        dams_cli.assign_data_source_to_data_model(
            data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id)
        dams_cli.assign_external_dataset_to_data_source(
            external_dataset_id=ds_id, data_source_id=ext_dsrc_id)
        dams_cli.assign_external_dataset_to_agent_instance(
            external_dataset_id=ds_id, agent_instance_id=eda_inst_id)
        #        dams_cli.assign_external_data_agent_to_agent_instance(external_data_agent_id=self.eda_id, agent_instance_id=self.eda_inst_id)

        # Generate the data product and associate it to the ExternalDataset
        dprod = DataProduct(name='usgs_parsed_product',
                            description='parsed usgs product')
        dproduct_id = dpms_cli.create_data_product(data_product=dprod)

        dams_cli.assign_data_product(input_resource_id=ds_id,
                                     data_product_id=dproduct_id,
                                     create_stream=True)

        stream_id, assn = rr_cli.find_objects(subject=dproduct_id,
                                              predicate=PRED.hasStream,
                                              object_type=RT.Stream,
                                              id_only=True)
        stream_id = stream_id[0]

        log.info('Created resources: {0}'.format({
            'ExternalDataset': ds_id,
            'ExternalDataProvider': ext_dprov_id,
            'DataSource': ext_dsrc_id,
            'DataSourceModel': ext_dsrc_model_id,
            'DataProducer': dproducer_id,
            'DataProduct': dproduct_id,
            'Stream': stream_id
        }))

        #CBM: Use CF standard_names

        ttool = TaxyTool()
        ttool.add_taxonomy_set('time', 'time')
        ttool.add_taxonomy_set('lon', 'longitude')
        ttool.add_taxonomy_set('lat', 'latitude')
        ttool.add_taxonomy_set('z', 'water depth')
        ttool.add_taxonomy_set('water_temperature',
                               'average water temperature')
        ttool.add_taxonomy_set('water_temperature_bottom',
                               'water temperature at bottom of water column')
        ttool.add_taxonomy_set('water_temperature_middle',
                               'water temperature at middle of water column')
        ttool.add_taxonomy_set('streamflow', 'flow velocity of stream')
        ttool.add_taxonomy_set('specific_conductance',
                               'specific conductance of water')
        ttool.add_taxonomy_set('data_qualifier', 'data qualifier flag')

        ttool.add_taxonomy_set('coords',
                               'This group contains coordinate parameters')
        ttool.add_taxonomy_set('data', 'This group contains data parameters')

        # Create the logger for receiving publications
        self.create_stream_and_logger(name='usgs', stream_id=stream_id)

        self.EDA_RESOURCE_ID = ds_id
        self.EDA_NAME = ds_name
        self.DVR_CONFIG['dh_cfg'] = {
            'TESTING': True,
            'stream_id': stream_id,
            'taxonomy': ttool.dump(),
            'data_producer_id':
            dproducer_id,  #CBM: Should this be put in the main body of the config - with mod & cls?
            'max_records': 4,
        }
    def _setup_ncom(self):
        # TODO: some or all of this (or some variation) should move to DAMS

        # Create and register the necessary resources/objects

        eda = ExternalDatasetAgent()
        eda_id = self.dams_cli.create_external_dataset_agent(eda)

        eda_inst = ExternalDatasetAgentInstance()
        eda_inst_id = self.dams_cli.create_external_dataset_agent_instance(
            eda_inst, external_dataset_agent_id=eda_id)

        # Create DataProvider
        dprov = ExternalDataProvider(institution=Institution(),
                                     contact=ContactInformation())
        #        dprov.institution.name = "OOI CGSN"
        dprov.contact.name = "Robert Weller"
        dprov.contact.email = "*****@*****.**"

        # Create DataSource
        dsrc = DataSource(protocol_type="DAP",
                          institution=Institution(),
                          contact=ContactInformation())
        #        dsrc.connection_params["base_data_url"] = "http://ooi.whoi.edu/thredds/dodsC/"
        dsrc.connection_params["base_data_url"] = ""
        dsrc.contact.name = "Rich Signell"
        dsrc.contact.email = "*****@*****.**"

        # Create ExternalDataset
        dset = ExternalDataset(name="test",
                               dataset_description=DatasetDescription(),
                               update_description=UpdateDescription(),
                               contact=ContactInformation())

        #        dset.dataset_description.parameters["dataset_path"] = "ooi/AS02CPSM_R_M.nc"
        dset.dataset_description.parameters[
            "dataset_path"] = "test_data/ncom.nc"
        dset.dataset_description.parameters["temporal_dimension"] = "time"
        dset.dataset_description.parameters["zonal_dimension"] = "lon"
        dset.dataset_description.parameters["meridional_dimension"] = "lat"

        # Create DataSourceModel
        dsrc_model = DataSourceModel(name="dap_model")
        dsrc_model.model = "DAP"
        dsrc_model.data_handler_module = "eoi.agent.handler.dap_external_data_handler"
        dsrc_model.data_handler_class = "DapExternalDataHandler"

        ## Run everything through DAMS
        ds_id = self.ncom_ds_id = self.dams_cli.create_external_dataset(
            external_dataset=dset)
        ext_dprov_id = self.dams_cli.create_external_data_provider(
            external_data_provider=dprov)
        ext_dsrc_id = self.dams_cli.create_data_source(data_source=dsrc)
        ext_dsrc_model_id = self.dams_cli.create_data_source_model(dsrc_model)

        # Register the ExternalDataset
        dproducer_id = self.dams_cli.register_external_data_set(
            external_dataset_id=ds_id)

        ## Associate everything
        # Convenience method
        #        self.dams_cli.assign_eoi_resources(external_data_provider_id=ext_dprov_id, data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id, external_dataset_id=ds_id, external_data_agent_id=eda_id, agent_instance_id=eda_inst_id)

        # Or using each method
        self.dams_cli.assign_data_source_to_external_data_provider(
            data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id)
        self.dams_cli.assign_data_source_to_data_model(
            data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id)
        self.dams_cli.assign_external_dataset_to_data_source(
            external_dataset_id=ds_id, data_source_id=ext_dsrc_id)
        self.dams_cli.assign_external_dataset_to_agent_instance(
            external_dataset_id=ds_id, agent_instance_id=eda_inst_id)
        #        self.dams_cli.assign_external_dataset_agent_to_data_model(external_data_agent_id=eda_id, data_source_model_id=ext_dsrc_model_id)
        #        self.dams_cli.assign_external_data_agent_to_agent_instance(external_data_agent_id=eda_id, agent_instance_id=eda_inst_id)

        # Generate the data product and associate it to the ExternalDataset
        dprod = DataProduct(name='ncom_product',
                            description='raw ncom product')
        dproduct_id = self.dpms_cli.create_data_product(data_product=dprod)

        self.dams_cli.assign_data_product(input_resource_id=ds_id,
                                          data_product_id=dproduct_id,
                                          create_stream=True)
    def test_pydap(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition("example", parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name="example")
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()

        data_product_id = self.data_product_management.create_data_product(dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)

        self.data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 10)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.wait())

        gevent.sleep(1)  # Yield to other greenlets, had an issue with connectivity

        pydap_host = CFG.get_safe("server.pydap.host", "localhost")
        pydap_port = CFG.get_safe("server.pydap.port", 8001)
        url = "http://%s:%s/%s" % (pydap_host, pydap_port, data_product_id)

        for i in xrange(3):  # Do it three times to test that the cache doesn't corrupt the requests/responses
            ds = open_url(url)

            np.testing.assert_array_equal(list(ds["data"]["time"]), np.arange(10))
            untested = []
            for k, v in rdt.iteritems():
                if k == rdt.temporal_parameter:
                    continue
                context = rdt.context(k)
                if isinstance(context.param_type, QuantityType):
                    np.testing.assert_array_equal(list(ds["data"][k]), rdt[k])
                elif isinstance(context.param_type, ArrayType):
                    if context.param_type.inner_encoding is None:
                        values = np.empty(rdt[k].shape, dtype="O")
                        for i, obj in enumerate(rdt[k]):
                            values[i] = str(obj)
                        np.testing.assert_array_equal(list(ds["data"][k]), values)
                    elif len(rdt[k].shape) > 1:
                        values = np.empty(rdt[k].shape[0], dtype="O")
                        for i in xrange(rdt[k].shape[0]):
                            values[i] = ",".join(map(lambda x: str(x), rdt[k][i].tolist()))
                elif isinstance(context.param_type, ConstantType):
                    np.testing.assert_array_equal(list(ds["data"][k]), rdt[k])
                elif isinstance(context.param_type, CategoryType):
                    np.testing.assert_array_equal(list(ds["data"][k]), rdt[k])
                else:
                    untested.append("%s (%s)" % (k, context.param_type))
            if untested:
                raise AssertionError("Untested parameters: %s" % untested)
    def _setup_hfr(self):
        # TODO: some or all of this (or some variation) should move to DAMS

        # Create and register the necessary resources/objects

        eda = ExternalDatasetAgent()
        eda_id = self.dams_cli.create_external_dataset_agent(eda)

        eda_inst = ExternalDatasetAgentInstance()
        eda_inst_id = self.dams_cli.create_external_dataset_agent_instance(
            eda_inst, external_dataset_agent_id=eda_id)

        # Create DataProvider
        dprov = ExternalDataProvider(institution=Institution(),
                                     contact=ContactInformation())
        #        dprov.institution.name = "HFR UCSD"

        # Create DataSource
        dsrc = DataSource(protocol_type="DAP",
                          institution=Institution(),
                          contact=ContactInformation())
        dsrc.connection_params[
            "base_data_url"] = "http://hfrnet.ucsd.edu:8080/thredds/dodsC/"

        # Create ExternalDataset
        dset = ExternalDataset(name="UCSD HFR",
                               dataset_description=DatasetDescription(),
                               update_description=UpdateDescription(),
                               contact=ContactInformation())
        dset.dataset_description.parameters[
            "dataset_path"] = "HFRNet/USEGC/6km/hourly/RTV"
        #        dset.dataset_description.parameters["dataset_path"] = "test_data/hfr.nc"
        dset.dataset_description.parameters["temporal_dimension"] = "time"
        dset.dataset_description.parameters["zonal_dimension"] = "lon"
        dset.dataset_description.parameters["meridional_dimension"] = "lat"

        # Create DataSourceModel
        dsrc_model = DataSourceModel(name="dap_model")
        dsrc_model.model = "DAP"
        dsrc_model.data_handler_module = "eoi.agent.handler.dap_external_data_handler"
        dsrc_model.data_handler_class = "DapExternalDataHandler"

        ## Run everything through DAMS
        ds_id = self.hfr_ds_id = self.dams_cli.create_external_dataset(
            external_dataset=dset)
        ext_dprov_id = self.dams_cli.create_external_data_provider(
            external_data_provider=dprov)
        ext_dsrc_id = self.dams_cli.create_data_source(data_source=dsrc)
        ext_dsrc_model_id = self.dams_cli.create_data_source_model(dsrc_model)

        # Register the ExternalDataset
        dproducer_id = self.dams_cli.register_external_data_set(
            external_dataset_id=ds_id)

        ## Associate everything
        # Convenience method
        #        self.dams_cli.assign_eoi_resources(external_data_provider_id=ext_dprov_id, data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id, external_dataset_id=ds_id, external_data_agent_id=eda_id, agent_instance_id=eda_inst_id)

        # Or using each method
        self.dams_cli.assign_data_source_to_external_data_provider(
            data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id)
        self.dams_cli.assign_data_source_to_data_model(
            data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id)
        self.dams_cli.assign_external_dataset_to_data_source(
            external_dataset_id=ds_id, data_source_id=ext_dsrc_id)
        self.dams_cli.assign_external_dataset_to_agent_instance(
            external_dataset_id=ds_id, agent_instance_id=eda_inst_id)
        #        self.dams_cli.assign_external_dataset_agent_to_data_model(external_data_agent_id=eda_id, data_source_model_id=ext_dsrc_model_id)
        #        self.dams_cli.assign_external_data_agent_to_agent_instance(external_data_agent_id=eda_id, agent_instance_id=eda_inst_id)

        # Generate the data product and associate it to the ExternalDataset
        dprod = DataProduct(name='hfr_product', description='raw hfr product')
        dproduct_id = self.dpms_cli.create_data_product(data_product=dprod)

        self.dams_cli.assign_data_product(input_resource_id=ds_id,
                                          data_product_id=dproduct_id,
                                          create_stream=True)