コード例 #1
0
class RecordDictionaryIntegrationTest(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.dataset_management = DatasetManagementServiceClient()
        self.pubsub_management  = PubsubManagementServiceClient()

        self.rdt                      = None
        self.data_producer_id         = None
        self.provider_metadata_update = None
        self.event                    = Event()

    def verify_incoming(self, m,r,s):
        rdt = RecordDictionaryTool.load_from_granule(m)
        for k,v in rdt.iteritems():
            np.testing.assert_array_equal(v, self.rdt[k])
        self.assertEquals(m.data_producer_id, self.data_producer_id)
        self.assertEquals(m.provider_metadata_update, self.provider_metadata_update)
        self.assertNotEqual(m.creation_timestamp, None)
        self.event.set()


    def test_serialize_compatability(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition('ctd extended', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('ctd1', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        sub_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)

        verified = Event()
        def verifier(msg, route, stream_id):
            for k,v in msg.record_dictionary.iteritems():
                if v is not None:
                    self.assertIsInstance(v, np.ndarray)
            rdt = RecordDictionaryTool.load_from_granule(msg)
            for k,v in rdt.iteritems():
                self.assertIsInstance(rdt[k], np.ndarray)
                self.assertIsInstance(v, np.ndarray)
            verified.set()

        subscriber = StandaloneStreamSubscriber('sub1', callback=verifier)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        publisher = StandaloneStreamPublisher(stream_id,route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        ph.fill_rdt(rdt,10)
        publisher.publish(rdt.to_granule())
        self.assertTrue(verified.wait(60))


    def test_granule(self):
        
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"})
        pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict')
        self.addCleanup(self.pubsub_management.delete_stream_definition,stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream,stream_id)
        publisher = StandaloneStreamPublisher(stream_id, route)

        subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id])
        self.pubsub_management.activate_subscription(subscription_id)


        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['pressure'] = [20] * 10

        self.assertEquals(set(pdict.keys()), set(rdt.fields))
        self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter)

        self.assertEquals(rdt._stream_config['reference_designator'],"GA03FLMA-RI001-13-CTDMOG999")

        self.rdt = rdt
        self.data_producer_id = 'data_producer'
        self.provider_metadata_update = {1:1}

        publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1}))

        self.assertTrue(self.event.wait(10))
        
        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
        
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.array([None,None,None])
        self.assertTrue(rdt['time'] is None)
        
        rdt['time'] = np.array([None, 1, 2])
        self.assertEquals(rdt['time'][0], rdt.fill_value('time'))


        stream_def_obj = self.pubsub_management.read_stream_definition(stream_def_id)
        rdt = RecordDictionaryTool(stream_definition=stream_def_obj)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)


        granule = rdt.to_granule()
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        np.testing.assert_array_equal(rdt['temp'], np.arange(20))

        
    def test_filter(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        filtered_stream_def_id = self.pubsub_management.create_stream_definition('filtered', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, filtered_stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id)
        self.assertEquals(rdt._available_fields,['time','temp'])
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        with self.assertRaises(KeyError):
            rdt['pressure'] = np.arange(20)

        granule = rdt.to_granule()
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        self.assertEquals(rdt._available_fields, rdt2._available_fields)
        self.assertEquals(rdt.fields, rdt2.fields)
        for k,v in rdt.iteritems():
            self.assertTrue(np.array_equal(rdt[k], rdt2[k]))
        


    def test_rdt_param_funcs(self):
        param_funcs = {
            'identity' : {
                'function_type' : PFT.PYTHON,
                'owner' : 'ion_functions.data.interpolation',
                'function' : 'identity',
                'args':['x']
            },
            'ctd_tempwat' : {
                'function_type' : PFT.PYTHON,
                'owner' : 'ion_functions.data.ctd_functions',
                'function' : 'ctd_sbe37im_tempwat',
                'args' : ['t0']
            },
            'ctd_preswat' : {
                'function_type' : PFT.PYTHON,
                'owner' : 'ion_functions.data.ctd_functions',
                'function' : 'ctd_sbe37im_preswat',
                'args' : ["p0", "p_range_psia"]
            },
            'ctd_condwat' : {
                'function_type' : PFT.PYTHON,
                'owner' : 'ion_functions.data.ctd_functions',
                'function' : 'ctd_sbe37im_condwat',
                'args' : ['c0']
            },
            'ctd_pracsal' : {
                'function_type' : PFT.PYTHON,
                'owner' : 'ion_functions.data.ctd_functions',
                'function' : 'ctd_pracsal',
                'args' : ['c', 't', 'p']
            },
            'ctd_density' : {
                'function_type' : PFT.PYTHON,
                'owner' : 'ion_functions.data.ctd_functions',
                'function' : 'ctd_density',
                'args' : ['SP','t','p','lat','lon']
            }
        }

        pfunc_ids = {}
        for name, param_def in param_funcs.iteritems():
            paramfunc = ParameterFunction(name, **param_def)
            pf_id = self.dataset_management.create_parameter_function(paramfunc)
            pfunc_ids[name] = pf_id


        params = {
            'time' : {
                'parameter_type' : 'quantity',
                'value_encoding' : 'float64',
                'units' : 'seconds since 1900-01-01'
            },
            'temperature_counts' : {
                'parameter_type' : 'quantity',
                'value_encoding' : 'float32',
                'units' : '1'
            },
            'pressure_counts' : {
                'parameter_type' : 'quantity',
                'value_encoding' : 'float32',
                'units' : '1'
            },
            'conductivity_counts' : {
                'parameter_type' : 'quantity',
                'value_encoding' : 'float32',
                'units' : '1'
            },
            'temperature' : {
                'parameter_type' : 'function',
                'parameter_function_id' : pfunc_ids['ctd_tempwat'],
                'parameter_function_map' : { 't0' : 'temperature_counts'},
                'value_encoding' : 'float32',
                'units' : 'deg_C'
            },
            'pressure' : {
                'parameter_type' : 'function',
                'parameter_function_id' : pfunc_ids['ctd_preswat'],
                'parameter_function_map' : {'p0' : 'pressure_counts', 'p_range_psia' : 679.34040721},
                'value_encoding' : 'float32',
                'units' : 'dbar'
            },
            'conductivity' : {
                'parameter_type' : 'function',
                'parameter_function_id' : pfunc_ids['ctd_condwat'],
                'parameter_function_map' : {'c0' : 'conductivity_counts'},
                'value_encoding' : 'float32',
                'units' : 'Sm-1'
            },
            'salinity' : {
                'parameter_type' : 'function',
                'parameter_function_id' : pfunc_ids['ctd_pracsal'],
                'parameter_function_map' : {'c' : 'conductivity', 't' : 'temperature', 'p' : 'pressure'},
                'value_encoding' : 'float32',
                'units' : '1'
            },
            'density' : {
                'parameter_type' : 'function',
                'parameter_function_id' : pfunc_ids['ctd_density'],
                'parameter_function_map' : {
                    'SP' : 'salinity',
                    't' : 'temperature',
                    'p' : 'pressure',
                    'lat' : 'lat',
                    'lon' : 'lon'
                },
                'value_encoding' : 'float32',
                'units' : 'kg m-1'
            },
            'lat' : {
                'parameter_type' : 'sparse',
                'value_encoding' : 'float32',
                'units' : 'degrees_north'
            },
            'lon' : {
                'parameter_type' : 'sparse',
                'value_encoding' : 'float32',
                'units' : 'degrees_east'
            }
        }
        param_dict = {}
        for name, param in params.iteritems():
            pcontext = ParameterContext(name, **param)
            param_id = self.dataset_management.create_parameter(pcontext)
            param_dict[name] = param_id
            
        pdict_id = self.dataset_management.create_parameter_dictionary('ctd_test', param_dict.values(), 'time')
        stream_def_id = self.pubsub_management.create_stream_definition('ctd_test', parameter_dictionary_id=pdict_id)


        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temperature_counts'] = [280000]
        rdt['conductivity_counts'] = [100000]
        rdt['pressure_counts'] = [2789]

        rdt['lat'] = [45]
        rdt['lon'] = [-71]

        np.testing.assert_allclose(rdt['density'], np.array([1001.00543606]))

    def test_rdt_lookup(self):
        rdt = self.create_lookup_rdt()

        self.assertTrue('offset_a' in rdt.lookup_values())
        self.assertFalse('offset_b' in rdt.lookup_values())

        rdt['time'] = [0]
        rdt['temp'] = [10.0]
        rdt['offset_a'] = [2.0]
        self.assertEquals(rdt['offset_b'], None)
        self.assertEquals(rdt.lookup_values(), ['offset_a'])
        np.testing.assert_array_almost_equal(rdt['calibrated'], np.array([12.0]))

        svm = StoredValueManager(self.container)
        svm.stored_value_cas('coefficient_document', {'offset_b':2.0})
        svm.stored_value_cas("GA03FLMA-RI001-13-CTDMOG999_OFFSETC", {'offset_c':3.0})
        rdt.fetch_lookup_values()
        np.testing.assert_array_equal(rdt['offset_b'], np.array([2.0]))
        np.testing.assert_array_equal(rdt['calibrated_b'], np.array([14.0]))
        np.testing.assert_array_equal(rdt['offset_c'], np.array([3.0]))


    def create_rdt(self):
        contexts, pfuncs = self.create_pfuncs()
        context_ids = list(contexts.itervalues())

        pdict_id = self.dataset_management.create_parameter_dictionary(name='functional_pdict', parameter_context_ids=context_ids, temporal_context='test_TIME')
        self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)
        stream_def_id = self.pubsub_management.create_stream_definition('functional', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        return rdt

    def create_lookup_rdt(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()

        stream_def_id = self.pubsub_management.create_stream_definition('lookup', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"})
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        return rdt


    def create_pfuncs(self):
        
        contexts = {}
        funcs = {}

        t_ctxt = ParameterContext(name='TIME', 
                                  parameter_type='quantity',
                                  value_encoding='float64',
                                  units='seconds since 1900-01-01')
        t_ctxt_id = self.dataset_management.create_parameter(t_ctxt)
        contexts['TIME'] = t_ctxt_id

        lat_ctxt = ParameterContext(name='LAT', 
                                    parameter_type="sparse",
                                    value_encoding='float32',
                                    units='degrees_north')
        lat_ctxt_id = self.dataset_management.create_parameter(lat_ctxt)
        contexts['LAT'] = lat_ctxt_id

        lon_ctxt = ParameterContext(name='LON', 
                                    parameter_type='sparse',
                                    value_encoding='float32',
                                    units='degrees_east')
        lon_ctxt_id = self.dataset_management.create_parameter(lon_ctxt)
        contexts['LON'] = lon_ctxt_id

        # Independent Parameters

        # Temperature - values expected to be the decimal results of conversion from hex
        temp_ctxt = ParameterContext(name='TEMPWAT_L0', 
                parameter_type='quantity',
                value_encoding='float32',
                units='deg_C')
        temp_ctxt_id = self.dataset_management.create_parameter(temp_ctxt)
        contexts['TEMPWAT_L0'] = temp_ctxt_id

        # Conductivity - values expected to be the decimal results of conversion from hex
        cond_ctxt = ParameterContext(name='CONDWAT_L0', 
                parameter_type='quantity',
                value_encoding='float32',
                units='S m-1')
        cond_ctxt_id = self.dataset_management.create_parameter(cond_ctxt)
        contexts['CONDWAT_L0'] = cond_ctxt_id

        # Pressure - values expected to be the decimal results of conversion from hex
        press_ctxt = ParameterContext(name='PRESWAT_L0', 
                parameter_type='quantity',
                value_encoding='float32',
                units='dbar')
        press_ctxt_id = self.dataset_management.create_parameter(press_ctxt)
        contexts['PRESWAT_L0'] = press_ctxt_id


        # Dependent Parameters

        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        tl1_func = '(T / 10000) - 10'
        expr = ParameterFunction(name='TEMPWAT_L1',
                function_type=PFT.NUMEXPR,
                function=tl1_func,
                args=['T'])
        expr_id = self.dataset_management.create_parameter_function(expr)
        funcs['TEMPWAT_L1'] = expr_id

        tl1_pmap = {'T': 'TEMPWAT_L0'}
        tempL1_ctxt = ParameterContext(name='TEMPWAT_L1', 
                parameter_type='function',
                parameter_function_id=expr_id,
                parameter_function_map=tl1_pmap,
                value_encoding='float32',
                units='deg_C')
        tempL1_ctxt_id = self.dataset_management.create_parameter(tempL1_ctxt)
        contexts['TEMPWAT_L1'] = tempL1_ctxt_id

        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        cl1_func = '(C / 100000) - 0.5'
        expr = ParameterFunction(name='CONDWAT_L1',
                function_type=PFT.NUMEXPR,
                function=cl1_func,
                args=['C'])
        expr_id = self.dataset_management.create_parameter_function(expr)
        funcs['CONDWAT_L1'] = expr_id

        cl1_pmap = {'C': 'CONDWAT_L0'}
        condL1_ctxt = ParameterContext(name='CONDWAT_L1', 
                parameter_type='function',
                parameter_function_id=expr_id,
                parameter_function_map=cl1_pmap,
                value_encoding='float32',
                units='S m-1')
        condL1_ctxt_id = self.dataset_management.create_parameter(condL1_ctxt)
        contexts['CONDWAT_L1'] = condL1_ctxt_id

        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)'
        expr = ParameterFunction(name='PRESWAT_L1',function=pl1_func,function_type=PFT.NUMEXPR,args=['P','p_range'])
        expr_id = self.dataset_management.create_parameter_function(expr)
        funcs['PRESWAT_L1'] = expr_id
        
        pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721}
        presL1_ctxt = ParameterContext(name='PRESWAT_L1',
                parameter_type='function',
                parameter_function_id=expr_id,
                parameter_function_map=pl1_pmap,
                value_encoding='float32',
                units='S m-1')
        presL1_ctxt_id = self.dataset_management.create_parameter(presL1_ctxt)
        contexts['PRESWAT_L1'] = presL1_ctxt_id

        # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project:
        #       https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1

        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        owner = 'gsw'
        sal_func = 'SP_from_C'
        sal_arglist = ['C', 't', 'p']
        expr = ParameterFunction(name='PRACSAL',function_type=PFT.PYTHON,function=sal_func,owner=owner,args=sal_arglist)
        expr_id = self.dataset_management.create_parameter_function(expr)
        funcs['PRACSAL'] = expr_id
        
        c10_f = ParameterFunction(name='c10', function_type=PFT.NUMEXPR, function='C*10', args=['C'])
        expr_id = self.dataset_management.create_parameter_function(c10_f)
        c10 = ParameterContext(name='c10', 
                parameter_type='function',
                parameter_function_id=expr_id,
                parameter_function_map={'C':'CONDWAT_L1'},
                value_encoding='float32',
                units='1')
        c10_id = self.dataset_management.create_parameter(c10)
        contexts['c10'] = c10_id

        # A magic function that may or may not exist actually forms the line below at runtime.
        sal_pmap = {'C': 'c10', 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'}
        sal_ctxt = ParameterContext(name='PRACSAL', 
                parameter_type='function',
                parameter_function_id=expr_id,
                parameter_function_map=sal_pmap,
                value_encoding='float32',
                units='g kg-1')

        sal_ctxt_id = self.dataset_management.create_parameter(sal_ctxt)
        contexts['PRACSAL'] = sal_ctxt_id

        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        owner = 'gsw'
        abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON','LAT'])
        cons_temp_expr = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1'])
        dens_expr = PythonFunction('DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1'])
        dens_ctxt = CoverageParameterContext('DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL)
        dens_ctxt.uom = 'kg m-3'
        dens_ctxt_id = self.dataset_management.create_parameter_context(name='DENSITY', parameter_context=dens_ctxt.dump())
        self.addCleanup(self.dataset_management.delete_parameter_context, dens_ctxt_id)
        contexts['DENSITY'] = dens_ctxt_id
        return contexts, funcs
コード例 #2
0
class DatasetManagementIntTest(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.resource_registry = ResourceRegistryServiceClient()
        self.dataset_management = DatasetManagementServiceClient()

    def test_dataset_crud(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        dataset = Dataset(name='ctd_dataset')
        dataset_id = self.dataset_management.create_dataset(
            dataset, parameter_dictionary_id=pdict_id)

        ds_obj = self.dataset_management.read_dataset(dataset_id)
        self.assertEquals(ds_obj.name, 'ctd_dataset')

        ds_obj.name = 'something different'
        self.dataset_management.update_dataset(ds_obj)
        ds_obj2 = self.dataset_management.read_dataset(dataset_id)
        self.assertEquals(ds_obj.name, ds_obj2.name)

    def test_context_crud(self):
        context_ids = self.create_contexts()
        context_id = context_ids.pop()

        ctxt = self.dataset_management.read_parameter_context(context_id)
        context = DatasetManagementService.get_coverage_parameter(ctxt)
        self.assertIsInstance(context, CoverageParameterContext)

        self.dataset_management.delete_parameter_context(context_id)

        with self.assertRaises(NotFound):
            self.dataset_management.read_parameter_context(context_id)

    def test_pfunc_crud(self):
        contexts, funcs = self.create_pfuncs()
        context_ids = [context_id for context_id in contexts.itervalues()]

        pdict_id = self.dataset_management.create_parameter_dictionary(
            name='functional_pdict',
            parameter_context_ids=context_ids,
            temporal_context='time')
        self.addCleanup(self.dataset_management.delete_parameter_dictionary,
                        pdict_id)

        expr_id = funcs['CONDWAT_L1']
        expr = self.dataset_management.read_parameter_function(expr_id)
        func_class = DatasetManagementService.get_coverage_function(expr)
        self.assertIsInstance(func_class, NumexprFunction)

    def test_pdict_crud(self):
        context_ids = self.create_contexts()
        pdict_res_id = self.dataset_management.create_parameter_dictionary(
            name='pdict1',
            parameter_context_ids=context_ids,
            temporal_context='time')

        pdict_contexts = self.dataset_management.read_parameter_contexts(
            parameter_dictionary_id=pdict_res_id, id_only=True)

        pdict = DatasetManagementService.get_parameter_dictionary(pdict_res_id)
        self.assertIsInstance(pdict, ParameterDictionary)
        self.assertTrue('time_test' in pdict)
        self.assertEquals(pdict.identifier, pdict_res_id)

        self.assertEquals(set(pdict_contexts), set(context_ids))

        self.dataset_management.delete_parameter_dictionary(
            parameter_dictionary_id=pdict_res_id)
        with self.assertRaises(NotFound):
            self.dataset_management.read_parameter_dictionary(
                parameter_dictionary_id=pdict_res_id)

    def create_contexts(self):
        context_ids = []
        cond = ParameterContext(name='condictivity_test',
                                parameter_type='quantity',
                                value_encoding='float32',
                                units='1',
                                fill_value=0)
        context_ids.append(self.dataset_management.create_parameter(cond))

        pres = ParameterContext(name='pressure_test',
                                parameter_type='quantity',
                                value_encoding='float32',
                                units='Pa',
                                fill_value=0)
        context_ids.append(self.dataset_management.create_parameter(pres))

        sal = ParameterContext(name='salinity_test',
                               parameter_type='quantity',
                               value_encoding='float32',
                               units='psu',
                               fill_value=0)
        context_ids.append(self.dataset_management.create_parameter(sal))

        temp = ParameterContext(name='temp_test',
                                parameter_type='quantity',
                                value_encoding='float32',
                                units='degree_C',
                                fill_value=0)
        context_ids.append(self.dataset_management.create_parameter(temp))

        time_test = ParameterContext(name='time_test',
                                     parameter_type='quantity',
                                     value_encoding='float32',
                                     units='seconds since 1970-01-01',
                                     fill_value=0)
        context_ids.append(self.dataset_management.create_parameter(time_test))

        return context_ids

    def create_pfuncs(self):
        contexts = {}
        funcs = {}

        time_ = ParameterContext(name='TIME',
                                 parameter_type='quantity',
                                 value_encoding='float32',
                                 units='seconds since 1900-01-01',
                                 fill_value=0)

        t_ctxt_id = self.dataset_management.create_parameter(time_)
        contexts['TIME'] = t_ctxt_id

        lat = ParameterContext(name='LAT',
                               parameter_type='sparse',
                               value_encoding='float32',
                               units='degrees_north',
                               fill_value=-9999.)
        lat_ctxt_id = self.dataset_management.create_parameter(lat)
        contexts['LAT'] = lat_ctxt_id

        lon = ParameterContext(name='LON',
                               parameter_type="sparse",
                               value_encoding='float32',
                               units='degrees_east',
                               fill_value=-9999)
        lon_ctxt_id = self.dataset_management.create_parameter(lon)
        contexts['LON'] = lon_ctxt_id

        # Independent Parameters

        # Temperature - values expected to be the decimal results of conversion from hex
        temp = ParameterContext(name='TEMPWAT_L0',
                                parameter_type='quantity',
                                value_encoding='float32',
                                units='deg_C')
        temp_ctxt_id = self.dataset_management.create_parameter(temp)
        contexts['TEMPWAT_L0'] = temp_ctxt_id

        # Conductivity - values expected to be the decimal results of conversion from hex
        cond = ParameterContext(name='CONDWAT_L0',
                                parameter_type='quantity',
                                value_encoding='float32',
                                units='S m-1')
        cond_ctxt_id = self.dataset_management.create_parameter(cond)
        contexts['CONDWAT_L0'] = cond_ctxt_id

        # Pressure - values expected to be the decimal results of conversion from hex
        press = ParameterContext(name='PRESWAT_L0',
                                 parameter_type='quantity',
                                 value_encoding='float32',
                                 units='dbar')
        press_ctxt_id = self.dataset_management.create_parameter(press)
        contexts['PRESWAT_L0'] = press_ctxt_id

        # Dependent Parameters

        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        tl1_func = '(T / 10000) - 10'
        tempwat_f = ParameterFunction(name='TEMPWAT_L1',
                                      function_type=PFT.NUMEXPR,
                                      function=tl1_func,
                                      args=['T'])
        expr_id = self.dataset_management.create_parameter_function(tempwat_f)
        funcs['TEMPWAT_L1'] = expr_id

        tl1_pmap = {'T': 'TEMPWAT_L0'}
        tempL1 = ParameterContext(name='TEMPWAT_L1',
                                  parameter_type='function',
                                  parameter_function_id=expr_id,
                                  parameter_function_map=tl1_pmap,
                                  value_encoding='float32',
                                  units='deg_C')
        tempL1_ctxt_id = self.dataset_management.create_parameter(tempL1)
        contexts['TEMPWAT_L1'] = tempL1_ctxt_id

        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        cl1_func = '(C / 100000) - 0.5'
        condwat_f = ParameterFunction(name='CONDWAT_L1',
                                      function_type=PFT.NUMEXPR,
                                      function=cl1_func,
                                      args=['C'])
        expr_id = self.dataset_management.create_parameter_function(condwat_f)
        funcs['CONDWAT_L1'] = expr_id

        cl1_pmap = {'C': 'CONDWAT_L0'}
        condL1 = ParameterContext(name='CONDWAT_L1',
                                  parameter_type='function',
                                  parameter_function_id=expr_id,
                                  parameter_function_map=cl1_pmap,
                                  value_encoding='float32',
                                  units='S m-1')
        condL1_ctxt_id = self.dataset_management.create_parameter(condL1)
        contexts['CONDWAT_L1'] = condL1_ctxt_id

        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)'
        preswat_f = ParameterFunction(name='PRESWAT_L1',
                                      function_type=PFT.NUMEXPR,
                                      function=pl1_func,
                                      args=['P', 'p_range'])
        expr_id = self.dataset_management.create_parameter_function(preswat_f)
        funcs['PRESWAT_L1'] = expr_id

        pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721}
        presL1 = ParameterContext(name='PRESWAT_L1',
                                  parameter_type='function',
                                  parameter_function_id=expr_id,
                                  parameter_function_map=pl1_pmap,
                                  value_encoding='float32',
                                  units='dbar')
        presL1_ctxt_id = self.dataset_management.create_parameter(presL1)
        contexts['PRESWAT_L1'] = presL1_ctxt_id

        # A magic function that may or may not exist actually forms the line below at runtime.
        cond_f = ParameterFunction(name='condwat10',
                                   function_type=PFT.NUMEXPR,
                                   function='C*10',
                                   args=['C'])
        expr_id = self.dataset_management.create_parameter_function(cond_f)
        cond10 = ParameterContext(name='c10',
                                  parameter_type='function',
                                  parameter_function_id=expr_id,
                                  parameter_function_map={'C': 'CONDWAT_L1'},
                                  value_encoding='float32',
                                  units='1')
        cond10_id = self.dataset_management.create_parameter(cond10)
        contexts['C10'] = cond10_id

        # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project:
        #       https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1

        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        owner = 'gsw'
        sal_func = 'SP_from_C'
        sal_arglist = ['C', 't', 'p']
        pracsal_f = ParameterFunction(name='PRACSAL',
                                      function_type=PFT.PYTHON,
                                      owner=owner,
                                      function=sal_func,
                                      args=sal_arglist)
        expr_id = self.dataset_management.create_parameter_function(pracsal_f)
        funcs['PRACSAL'] = expr_id

        sal_pmap = {'C': 'c10', 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'}
        sal_ctxt = ParameterContext(name='PRACSAL',
                                    parameter_type='function',
                                    parameter_function_id=expr_id,
                                    parameter_function_map=sal_pmap,
                                    value_encoding='float32',
                                    units='g kg-1')
        sal_ctxt_id = self.dataset_management.create_parameter(sal_ctxt)
        contexts['PRACSAL'] = sal_ctxt_id

        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        return contexts, funcs

    def test_verify_contexts(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)
        pcontexts = self.dataset_management.read_parameter_contexts(
            parameter_dictionary_id=pdict_id)
        for pcontext in pcontexts:
            self.assertTrue('fill_value' in pcontext)
            self.assertTrue('reference_urls' in pcontext)
            self.assertTrue('internal_name' in pcontext)
            self.assertTrue('display_name' in pcontext)
            self.assertTrue('standard_name' in pcontext)
            self.assertTrue('ooi_short_name' in pcontext)
            self.assertTrue('description' in pcontext)
            self.assertTrue('precision' in pcontext)
コード例 #3
0
    def on_start(self):

        ImmediateProcess.on_start(self)

        # necessary arguments, passed in via configuration kwarg to schedule_process. process namespace to avoid collisions
        fuc_id = self.CFG.get_safe('process.fuc_id',None) # FileUploadContext ID
        dp_id = self.CFG.get_safe('process.dp_id',None) # DataProduct ID

        # clients we'll need
        resource_registry = self.container.resource_registry
        object_store = self.container.object_store
        dataset_management =  DatasetManagementServiceClient()
        data_product_management = DataProductManagementServiceClient()

        # get the Object (dict) containing details of the uploaded file
        fuc = object_store.read(fuc_id)

        # get the ParameterContexts associated with this DataProduct
        sd_id = resource_registry.find_objects(dp_id, PRED.hasStreamDefinition, id_only=True)[0][0] # TODO loop
        pd_id = resource_registry.find_objects(sd_id, PRED.hasParameterDictionary, id_only=True)[0][0] # TODO loop
        pc_list, _ = resource_registry.find_objects(pd_id, PRED.hasParameterContext, id_only=False) # parameter contexts

        # NetCDF file open here
        nc_filename = fuc.get('path', None)
        if nc_filename is None:
            raise BadRequest("uploaded file has no path")

        # keep track of the number of fields we actually process
        nfields = 0

        with netCDF4.Dataset(nc_filename,'r') as nc:

            nc_time = nc.variables['time'][:] # don't modify nc_time below, read once use many times

            for v in nc.variables:
                variable = nc.variables[v]
                nc_name = str(v) # name of variable should be the same as what was downloaded, we'll append the c here
                # check for REQUIRED attributes
                author = getattr(variable, 'author', None)
                reason = getattr(variable, 'reason', None)
                if not all([author,reason]):
                    log.info('skipping parameter %s - no author or reason' % nc_name)
                    continue
                # get all ParameterContexts (from pc_list) with this 'name' (should be one at the moment)
                pc_matches_nc_name_list = [c for c in pc_list if c.name == nc_name]
                # is variable already present?
                if len(pc_matches_nc_name_list) < 1:
                    log.info('skipping parameter %s - not found in ParameterContexts associated with DataProduct' % nc_name)
                    continue

                # we are using this ParameterContext as a copy
                pc = pc_matches_nc_name_list[0] # TODO should only have 1 context per 'name' but could be checked for completeness
                # only allow L1/L2 paramters (check against ooi_short_name which should end with this)
                m = re.compile('(_L[12])$').search(pc.ooi_short_name.upper()) # capture L1/L2 for use in new name
                if not m: # if not _L1 or _L2 move on
                    log.info('skipping parameter %s - not L1 or L2' % nc_name)
                    continue
                processing_level = m.group(1)
                # remove attributes we should not copy [_id,_rev,ts_created,ts_updated]
                delattr(pc, '_id')
                delattr(pc, '_rev')
                delattr(pc, 'ts_created')
                delattr(pc, 'ts_updated')
                # append L[12]c to name attribute (new parameter name)
                c_name = ''.join([pc['name'],processing_level,'c'])
                pc['name'] = c_name
                # copy attributes from NetCDF file
                pc['units'] = variable.units
                pc['value_encoding'] = str(variable.dtype)
                #TODO ERDAP files don't have fill_value, but should probably get from there, leaving copy for now
                # create ParameterContext
                pc_id = dataset_management.create_parameter(pc)
                data_product_management.add_parameter_to_data_product(pc_id,dp_id)

                # get NetCDF data for this variable
                nc_data = variable[:]

                with DirectCoverageAccess() as dca:
                    # get the Dataset IDs associated with this DataProduct
                    ds_id_list, _ = resource_registry.find_objects(dp_id, PRED.hasDataset, id_only=True)
                    for ds_id in ds_id_list: # could be multiple Datasets for this DataProduct
                        with dca.get_editable_coverage(ds_id) as cov: # <-- This pauses ingestion
                            # times in this Dataset
                            cov_time = cov._range_value['time'][:]
                            # subset nc_time (only where nc_time matches cov_time)
                            nc_indicies = [i for i,x in enumerate(nc_time) if x in cov_time]
                            subset_nc_time = nc_time[nc_indicies] + 2208988800 # TODO REMOVE THIS? ERDAP 1970 vs NTP 1900
                            # don't forget to subset the data too
                            subset_nc_data = [nc_data[i] for i in nc_indicies]
                            # use indicies of where subset_nc_time exists in cov_time to update coverage
                            cov_indicies = np.flatnonzero(np.in1d(cov_time, subset_nc_time)) # returns numpy.ndarray of indicies
                            cov_indicies = list(cov_indicies) # converts to list for coverage
                            #cov._range_value[c_name][cov_indicies] = subset_nc_data # TODO this should eventually work
                            for i,x in enumerate(cov_indicies):
                                cov._range_value[c_name][x] = subset_nc_data[i]

                nfields = nfields + 1

        fuc['status'] = 'UploadDataProcessing process complete - %d fields created/updated' % nfields
        self.container.object_store.update_doc(fuc)

        # remove uploaded file
        try:
            os.remove(nc_filename)
        except OSError:
            pass # TODO take action to get this removed
コード例 #4
0
class DatasetManagementIntTest(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.resource_registry  = ResourceRegistryServiceClient()
        self.dataset_management = DatasetManagementServiceClient()

    def test_dataset_crud(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        tdom, sdom = time_series_domain()
        dataset_id = self.dataset_management.create_dataset(name='ctd_dataset', parameter_dictionary_id=pdict_id, spatial_domain=sdom.dump(), temporal_domain=tdom.dump())

        ds_obj = self.dataset_management.read_dataset(dataset_id)
        self.assertEquals(ds_obj.name, 'ctd_dataset')
        
        ds_obj.name = 'something different'
        self.dataset_management.update_dataset(ds_obj)
        ds_obj2 = self.dataset_management.read_dataset(dataset_id)
        self.assertEquals(ds_obj.name, ds_obj2.name)
    
    def test_context_crud(self):
        context_ids = self.create_contexts()
        context_id = context_ids.pop()

        ctxt = self.dataset_management.read_parameter_context(context_id)
        context = DatasetManagementService.get_coverage_parameter(ctxt)
        self.assertIsInstance(context, CoverageParameterContext)

        self.dataset_management.delete_parameter_context(context_id)

        with self.assertRaises(NotFound):
            self.dataset_management.read_parameter_context(context_id)



    def test_pfunc_crud(self):
        contexts, funcs = self.create_pfuncs()
        context_ids = [context_id for context_id in contexts.itervalues()]

        pdict_id = self.dataset_management.create_parameter_dictionary(name='functional_pdict', parameter_context_ids=context_ids, temporal_context='time')
        self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)

        expr_id = funcs['CONDWAT_L1']
        expr = self.dataset_management.read_parameter_function(expr_id)
        func_class = DatasetManagementService.get_coverage_function(expr)
        self.assertIsInstance(func_class, NumexprFunction)

    def test_pdict_crud(self):
        context_ids = self.create_contexts()
        pdict_res_id = self.dataset_management.create_parameter_dictionary(name='pdict1', parameter_context_ids=context_ids, temporal_context='time')

        pdict_contexts = self.dataset_management.read_parameter_contexts(parameter_dictionary_id=pdict_res_id, id_only=True)

        pdict = DatasetManagementService.get_parameter_dictionary(pdict_res_id)
        self.assertIsInstance(pdict, ParameterDictionary)
        self.assertTrue('time_test' in pdict)
        self.assertEquals(pdict.identifier, pdict_res_id)

        self.assertEquals(set(pdict_contexts), set(context_ids))

        self.dataset_management.delete_parameter_dictionary(parameter_dictionary_id=pdict_res_id)
        with self.assertRaises(NotFound):
            self.dataset_management.read_parameter_dictionary(parameter_dictionary_id=pdict_res_id)

    def create_contexts(self):
        context_ids = []
        cond = ParameterContext(name='condictivity_test',
                                parameter_type='quantity',
                                value_encoding='float32',
                                units='1',
                                fill_value=0)
        context_ids.append(self.dataset_management.create_parameter(cond))

        pres = ParameterContext(name='pressure_test',
                                parameter_type='quantity',
                                value_encoding='float32',
                                units='Pa',
                                fill_value=0)
        context_ids.append(self.dataset_management.create_parameter(pres))

        sal = ParameterContext(name='salinity_test', 
                               parameter_type='quantity',
                               value_encoding='float32',
                               units='psu',
                               fill_value=0)
        context_ids.append(self.dataset_management.create_parameter(sal))

        temp = ParameterContext(name='temp_test', 
                                parameter_type='quantity',
                                value_encoding='float32',
                                units='degree_C',
                                fill_value=0)
        context_ids.append(self.dataset_management.create_parameter(temp))

        time_test = ParameterContext(name='time_test', 
                                     parameter_type='quantity',
                                     value_encoding='float32',
                                     units='seconds since 1970-01-01',
                                     fill_value=0)
        context_ids.append(self.dataset_management.create_parameter(time_test))

        return context_ids

    def create_pfuncs(self):
        contexts = {}
        funcs = {}

        time_ = ParameterContext(name='TIME', 
                                 parameter_type='quantity',
                                 value_encoding='float32',
                                 units='seconds since 1900-01-01',
                                 fill_value=0)

        t_ctxt_id = self.dataset_management.create_parameter(time_)
        contexts['TIME'] = t_ctxt_id

        lat = ParameterContext(name='LAT', 
                               parameter_type='sparse',
                               value_encoding='float32',
                               units='degrees_north',
                               fill_value=-9999.)
        lat_ctxt_id = self.dataset_management.create_parameter(lat)
        contexts['LAT'] = lat_ctxt_id

        lon = ParameterContext(name='LON', 
                               parameter_type="sparse",
                               value_encoding='float32',
                               units='degrees_east',
                               fill_value=-9999)
        lon_ctxt_id = self.dataset_management.create_parameter(lon)
        contexts['LON'] = lon_ctxt_id

        # Independent Parameters

        # Temperature - values expected to be the decimal results of conversion from hex
        temp = ParameterContext(name='TEMPWAT_L0', 
                                parameter_type='quantity',
                                value_encoding='float32',
                                units='deg_C')
        temp_ctxt_id = self.dataset_management.create_parameter(temp)
        contexts['TEMPWAT_L0'] = temp_ctxt_id

        # Conductivity - values expected to be the decimal results of conversion from hex
        cond = ParameterContext(name='CONDWAT_L0', 
                                parameter_type='quantity',
                                value_encoding='float32',
                                units='S m-1')
        cond_ctxt_id = self.dataset_management.create_parameter(cond)
        contexts['CONDWAT_L0'] = cond_ctxt_id

        # Pressure - values expected to be the decimal results of conversion from hex
        press = ParameterContext(name='PRESWAT_L0', 
                                 parameter_type='quantity',
                                 value_encoding='float32',
                                 units='dbar')
        press_ctxt_id = self.dataset_management.create_parameter(press)
        contexts['PRESWAT_L0'] = press_ctxt_id


        # Dependent Parameters

        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        tl1_func = '(T / 10000) - 10'
        tempwat_f = ParameterFunction(name='TEMPWAT_L1',
                                      function_type=PFT.NUMEXPR,
                                      function=tl1_func,
                                      args=['T'])
        expr_id = self.dataset_management.create_parameter_function(tempwat_f)
        funcs['TEMPWAT_L1'] = expr_id

        tl1_pmap = {'T': 'TEMPWAT_L0'}
        tempL1 = ParameterContext(name='TEMPWAT_L1', 
                                  parameter_type='function',
                                  parameter_function_id=expr_id,
                                  parameter_function_map=tl1_pmap,
                                  value_encoding='float32',
                                  units='deg_C')
        tempL1_ctxt_id = self.dataset_management.create_parameter(tempL1)
        contexts['TEMPWAT_L1'] = tempL1_ctxt_id

        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        cl1_func = '(C / 100000) - 0.5'
        condwat_f = ParameterFunction(name='CONDWAT_L1',
                                      function_type=PFT.NUMEXPR,
                                      function=cl1_func,
                                      args=['C'])
        expr_id = self.dataset_management.create_parameter_function(condwat_f)
        funcs['CONDWAT_L1'] = expr_id

        cl1_pmap = {'C': 'CONDWAT_L0'}
        condL1 = ParameterContext(name='CONDWAT_L1', 
                                       parameter_type='function',
                                       parameter_function_id=expr_id,
                                       parameter_function_map=cl1_pmap,
                                       value_encoding='float32',
                                       units='S m-1')
        condL1_ctxt_id = self.dataset_management.create_parameter(condL1)
        contexts['CONDWAT_L1'] = condL1_ctxt_id

        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)'
        preswat_f = ParameterFunction(name='PRESWAT_L1',
                                      function_type=PFT.NUMEXPR,
                                      function=pl1_func,
                                      args=['P', 'p_range'])
        expr_id = self.dataset_management.create_parameter_function(preswat_f)
        funcs['PRESWAT_L1'] = expr_id
        
        pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721}
        presL1 = ParameterContext(name='PRESWAT_L1', 
                                  parameter_type='function',
                                  parameter_function_id=expr_id,
                                  parameter_function_map=pl1_pmap,
                                  value_encoding='float32',
                                  units='dbar')
        presL1_ctxt_id = self.dataset_management.create_parameter(presL1)
        contexts['PRESWAT_L1'] = presL1_ctxt_id

        
        # A magic function that may or may not exist actually forms the line below at runtime.
        cond_f = ParameterFunction(name='condwat10',
                                   function_type=PFT.NUMEXPR,
                                   function='C*10',
                                   args=['C'])
        expr_id = self.dataset_management.create_parameter_function(cond_f)
        cond10 = ParameterContext(name='c10',
                                  parameter_type='function',
                                  parameter_function_id=expr_id,
                                  parameter_function_map={'C':'CONDWAT_L1'},
                                  value_encoding='float32',
                                  units='1')
        cond10_id = self.dataset_management.create_parameter(cond10)
        contexts['C10'] = cond10_id
        
        # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project:
        #       https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1

        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        owner = 'gsw'
        sal_func = 'SP_from_C'
        sal_arglist = ['C', 't', 'p']
        pracsal_f = ParameterFunction(name='PRACSAL',
                                      function_type=PFT.PYTHON,
                                      owner=owner,
                                      function=sal_func,
                                      args=sal_arglist)
        expr_id = self.dataset_management.create_parameter_function(pracsal_f)
        funcs['PRACSAL'] = expr_id

        sal_pmap = {'C': 'c10', 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'}
        sal_ctxt = ParameterContext(name='PRACSAL', 
                                    parameter_type='function',
                                    parameter_function_id=expr_id,
                                    parameter_function_map=sal_pmap,
                                    value_encoding='float32',
                                    units='g kg-1')
        sal_ctxt_id = self.dataset_management.create_parameter(sal_ctxt)
        contexts['PRACSAL'] = sal_ctxt_id

        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        return contexts, funcs

    def test_verify_contexts(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True)
        pcontexts = self.dataset_management.read_parameter_contexts(parameter_dictionary_id=pdict_id)
        for pcontext in pcontexts:
            self.assertTrue('fill_value' in pcontext)
            self.assertTrue('reference_urls' in pcontext)
            self.assertTrue('internal_name' in pcontext)
            self.assertTrue('display_name' in pcontext)
            self.assertTrue('standard_name' in pcontext)
            self.assertTrue('ooi_short_name' in pcontext)
            self.assertTrue('description' in pcontext)
            self.assertTrue('precision' in pcontext)
コード例 #5
0
    def on_start(self):

        ImmediateProcess.on_start(self)

        # necessary arguments, passed in via configuration kwarg to schedule_process. process namespace to avoid collisions
        fuc_id = self.CFG.get_safe('process.fuc_id',None) # FileUploadContext ID
        dp_id = self.CFG.get_safe('process.dp_id',None) # DataProduct ID

        # clients we'll need
        resource_registry = self.container.resource_registry
        object_store = self.container.object_store
        dataset_management =  DatasetManagementServiceClient()
        data_product_management = DataProductManagementServiceClient()

        # get the Object (dict) containing details of the uploaded file
        fuc = object_store.read(fuc_id)

        # get the ParameterContexts associated with this DataProduct
        sd_id = resource_registry.find_objects(dp_id, PRED.hasStreamDefinition, id_only=True)[0][0] # TODO loop
        pd_id = resource_registry.find_objects(sd_id, PRED.hasParameterDictionary, id_only=True)[0][0] # TODO loop
        pc_list, _ = resource_registry.find_objects(pd_id, PRED.hasParameterContext, id_only=False) # parameter contexts

        # NetCDF file open here
        nc_filename = fuc.get('path', None)
        if nc_filename is None:
            raise BadRequest("uploaded file has no path")

        # keep track of the number of fields we actually process
        nfields = 0

        with netCDF4.Dataset(nc_filename,'r') as nc:

            nc_time = nc.variables['time'][:] # don't modify nc_time below, read once use many times

            for v in nc.variables:
                variable = nc.variables[v]
                nc_name = str(v) # name of variable should be the same as what was downloaded, we'll append the c here
                # check for REQUIRED attributes
                author = getattr(variable, 'author', None)
                reason = getattr(variable, 'reason', None)
                if not all([author,reason]):
                    log.info('skipping parameter %s - no author or reason' % nc_name)
                    continue
                # get all ParameterContexts (from pc_list) with this 'name' (should be one at the moment)
                pc_matches_nc_name_list = [c for c in pc_list if c.name == nc_name]
                # is variable already present?
                if len(pc_matches_nc_name_list) < 1:
                    log.info('skipping parameter %s - not found in ParameterContexts associated with DataProduct' % nc_name)
                    continue

                # we are using this ParameterContext as a copy
                pc = pc_matches_nc_name_list[0] # TODO should only have 1 context per 'name' but could be checked for completeness
                # only allow L1/L2 paramters (check against ooi_short_name which should end with this)
                m = re.compile('(_L[12])$').search(pc.ooi_short_name.upper()) # capture L1/L2 for use in new name
                if not m: # if not _L1 or _L2 move on
                    log.info('skipping parameter %s - not L1 or L2' % nc_name)
                    continue
                processing_level = m.group(1)
                # remove attributes we should not copy [_id,_rev,ts_created,ts_updated]
                delattr(pc, '_id')
                delattr(pc, '_rev')
                delattr(pc, 'ts_created')
                delattr(pc, 'ts_updated')
                # append L[12]c to name attribute (new parameter name)
                c_name = ''.join([pc['name'],processing_level,'c'])
                pc['name'] = c_name
                # copy attributes from NetCDF file
                pc['units'] = variable.units
                pc['value_encoding'] = str(variable.dtype)
                #TODO ERDAP files don't have fill_value, but should probably get from there, leaving copy for now
                # create ParameterContext
                pc_id = dataset_management.create_parameter(pc)
                data_product_management.add_parameter_to_data_product(pc_id,dp_id)

                # get NetCDF data for this variable
                nc_data = variable[:]

                with DirectCoverageAccess() as dca:
                    # get the Dataset IDs associated with this DataProduct
                    ds_id_list, _ = resource_registry.find_objects(dp_id, PRED.hasDataset, id_only=True)
                    for ds_id in ds_id_list: # could be multiple Datasets for this DataProduct
                        with dca.get_editable_coverage(ds_id) as cov: # <-- This pauses ingestion
                            # times in this Dataset
                            cov_time = cov.get_parameter_values(['time']).get_data()['time']
                            # subset nc_time (only where nc_time matches cov_time)
                            nc_indicies = [i for i,x in enumerate(nc_time) if x in cov_time]
                            subset_nc_time = nc_time[nc_indicies] + 2208988800 # TODO REMOVE THIS? ERDAP 1970 vs NTP 1900
                            # don't forget to subset the data too
                            subset_nc_data = [nc_data[i] for i in nc_indicies]
                            # use indicies of where subset_nc_time exists in cov_time to update coverage
                            cov_indicies = np.flatnonzero(np.in1d(cov_time, subset_nc_time)) # returns numpy.ndarray of indicies
                            cov_indicies = list(cov_indicies) # converts to list for coverage
                            #cov._range_value[c_name][cov_indicies] = subset_nc_data # TODO this should eventually work
                            for i,x in enumerate(cov_indicies):
                                cov._range_value[c_name][x] = subset_nc_data[i]

                nfields = nfields + 1

        fuc['status'] = 'UploadDataProcessing process complete - %d fields created/updated' % nfields
        self.container.object_store.update_doc(fuc)

        # remove uploaded file
        try:
            os.remove(nc_filename)
        except OSError:
            pass # TODO take action to get this removed