コード例 #1
0
class TestTransformPrime(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Because hey why not?!

        self.dataset_management      = DatasetManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.pubsub_management       = PubsubManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()


    def _create_proc_def(self):
        dpd_obj = DataProcessDefinition(
            name='Optimus',
            description='It\'s a transformer',
            module='ion.processes.data.transforms.transform_prime',
            class_name='TransformPrime')
        return self.data_process_management.create_data_process_definition(dpd_obj)
    

    def _L0_pdict(self):

        t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=np.dtype('int64')))
        t_ctxt.uom = 'seconds since 01-01-1900'
        t_ctxt.fill_value = -9999
        t_ctxt_id = self.dataset_management.create_parameter_context(name='time', parameter_context=t_ctxt.dump(), parameter_type='quantity<int64>', unit_of_measure=t_ctxt.uom)

        lat_ctxt = ParameterContext('lat', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))))
        lat_ctxt.axis = AxisTypeEnum.LAT
        lat_ctxt.uom = 'degree_north'
        lat_ctxt.fill_value = -9999
        lat_ctxt_id = self.dataset_management.create_parameter_context(name='lat', parameter_context=lat_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=lat_ctxt.uom)

        lon_ctxt = ParameterContext('lon', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))))
        lon_ctxt.axis = AxisTypeEnum.LON
        lon_ctxt.uom = 'degree_east'
        lon_ctxt.fill_value = -9999
        lon_ctxt_id = self.dataset_management.create_parameter_context(name='lon', parameter_context=lon_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=lon_ctxt.uom)


        temp_ctxt = ParameterContext('TEMPWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')))
        temp_ctxt.uom = 'deg_C'
        temp_ctxt.fill_value = -9999
        temp_ctxt_id = self.dataset_management.create_parameter_context(name='TEMPWAT_L0', parameter_context=temp_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=temp_ctxt.uom)

        # Conductivity - values expected to be the decimal results of conversion from hex
        cond_ctxt = ParameterContext('CONDWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')))
        cond_ctxt.uom = 'S m-1'
        cond_ctxt.fill_value = -9999
        cond_ctxt_id = self.dataset_management.create_parameter_context(name='CONDWAT_L0', parameter_context=cond_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=cond_ctxt.uom)

        # Pressure - values expected to be the decimal results of conversion from hex
        press_ctxt = ParameterContext('PRESWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')))
        press_ctxt.uom = 'dbar'
        press_ctxt.fill_value = -9999
        press_ctxt_id = self.dataset_management.create_parameter_context(name='PRESWAT_L0', parameter_context=press_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=press_ctxt.uom)


        context_ids = [t_ctxt_id, lat_ctxt_id, lon_ctxt_id, temp_ctxt_id, cond_ctxt_id, press_ctxt_id]

        pdict_id = self.dataset_management.create_parameter_dictionary('L0 SBE37', parameter_context_ids=context_ids, temporal_context='time')

        return pdict_id


    def _L1_pdict(self):
        pdict_id = self._L0_pdict()
        param_context_ids = self.dataset_management.read_parameter_contexts(pdict_id,id_only=True)


        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        tl1_func = '(TEMPWAT_L0 / 10000) - 10'
        tl1_pmap = {'TEMPWAT_L0':'TEMPWAT_L0'}
        func = NumexprFunction('TEMPWAT_L1', tl1_func, tl1_pmap)
        tempL1_ctxt = ParameterContext('TEMPWAT_L1', param_type=ParameterFunctionType(function=func), variability=VariabilityEnum.TEMPORAL)
        tempL1_ctxt.uom = 'deg_C'

        tempL1_ctxt_id = self.dataset_management.create_parameter_context(name=tempL1_ctxt.name, parameter_context=tempL1_ctxt.dump(), parameter_type='pfunc', unit_of_measure=tempL1_ctxt.uom)
        param_context_ids.append(tempL1_ctxt_id)

        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        cl1_func = '(CONDWAT_L0 / 100000) - 0.5'
        cl1_pmap = {'CONDWAT_L0':'CONDWAT_L0'}
        func = NumexprFunction('CONDWAT_L1', cl1_func, cl1_pmap)
        condL1_ctxt = ParameterContext('CONDWAT_L1', param_type=ParameterFunctionType(function=func), variability=VariabilityEnum.TEMPORAL)
        condL1_ctxt.uom = 'S m-1'
        condL1_ctxt_id = self.dataset_management.create_parameter_context(name=condL1_ctxt.name, parameter_context=condL1_ctxt.dump(), parameter_type='pfunc', unit_of_measure=condL1_ctxt.uom)
        param_context_ids.append(condL1_ctxt_id)
                

        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        pl1_func = '(PRESWAT_L0 * 679.34040721 / (0.85 * 65536)) - (0.05 * 679.34040721)'
        pl1_pmap = {'PRESWAT_L0':'PRESWAT_L0'}
        func = NumexprFunction('PRESWAT_L1', pl1_func, pl1_pmap)
        presL1_ctxt = ParameterContext('PRESWAT_L1', param_type=ParameterFunctionType(function=func), variability=VariabilityEnum.TEMPORAL)
        presL1_ctxt.uom = 'S m-1'
        presL1_ctxt_id = self.dataset_management.create_parameter_context(name=presL1_ctxt.name, parameter_context=presL1_ctxt.dump(), parameter_type='pfunc', unit_of_measure=presL1_ctxt.uom)
        param_context_ids.append(presL1_ctxt_id)

        # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project:
        #       https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1

        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        owner = 'gsw'
        sal_func = 'SP_from_C'
        sal_arglist = [NumexprFunction('CONDWAT_L1*10', 'C*10', {'C':'CONDWAT_L1'}), 'TEMPWAT_L1', 'PRESWAT_L1']
        sal_kwargmap = None
        func = PythonFunction('PRACSAL', owner, sal_func, sal_arglist, sal_kwargmap)
        sal_ctxt = ParameterContext('PRACSAL', param_type=ParameterFunctionType(func), variability=VariabilityEnum.TEMPORAL)
        sal_ctxt.uom = 'g kg-1'

        sal_ctxt_id = self.dataset_management.create_parameter_context(name=sal_ctxt.name, parameter_context=sal_ctxt.dump(), parameter_type='pfunc', unit_of_measure=sal_ctxt.uom)
        param_context_ids.append(sal_ctxt_id)

        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        owner = 'gsw'
        abs_sal_func = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'lon','lat'], None)
        #abs_sal_func = PythonFunction('abs_sal', owner, 'SA_from_SP', ['lon','lat'], None)
        cons_temp_func = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_func, 'TEMPWAT_L1', 'PRESWAT_L1'], None)
        dens_func = PythonFunction('DENSITY', owner, 'rho', [abs_sal_func, cons_temp_func, 'PRESWAT_L1'], None)
        dens_ctxt = ParameterContext('DENSITY', param_type=ParameterFunctionType(dens_func), variability=VariabilityEnum.TEMPORAL)
        dens_ctxt.uom = 'kg m-3'

        dens_ctxt_id = self.dataset_management.create_parameter_context(name=dens_ctxt.name, parameter_context=dens_ctxt.dump(), parameter_type='pfunc', unit_of_measure=dens_ctxt.uom)
        param_context_ids.append(dens_ctxt_id)

        pdict_id = self.dataset_management.create_parameter_dictionary('L1_SBE37', parameter_context_ids=param_context_ids, temporal_context='time')
        return pdict_id

    def _data_product(self, name, stream_def, exchange_pt):
        tdom, sdom = time_series_domain()
        dp_obj = DataProduct(name=name, description='blah', spatial_domain=sdom.dump(), temporal_domain=tdom.dump())
        dp_id = self.data_product_management.create_data_product(dp_obj, stream_def, exchange_pt)
        return dp_id

    def _data_process(self, proc_def_id, input_products, output_product, stream_def):
        fake_producer = DataProducer(name='fake_producer')

        fake_producer_id, _  = self.container.resource_registry.create(fake_producer)

        self.data_process_management.assign_stream_definition_to_data_process_definition(stream_def,proc_def_id,binding='output')
        
        data_process_id = self.data_process_management.create_data_process(proc_def_id, input_products, {'output':output_product})
        self.container.resource_registry.create_association(subject=data_process_id, predicate=PRED.hasDataProducer, object=fake_producer_id)

        self.data_process_management.activate_data_process(data_process_id)

    def _fake_producer(self):
        if not hasattr(self, 'producer'):
            self.fake_producer_id,_ = self.container.resource_registry.create(DataProducer(name='fake_producer'))
        return self.fake_producer_id

    def _publisher(self, data_product_id):
        stream_ids, _ = self.container.resource_registry.find_resources(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        stream_id = stream_ids[0]

        route = self.pubsub_management.read_stream_route(stream_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        return publisher
    
    def _get_param_vals(self, name, slice_, dims):
        shp = utils.slice_shape(slice_, dims)
        def _getarr(vmin, shp, vmax=None,):
            if vmax is None:
                return np.empty(shp).fill(vmin)
            return np.arange(vmin, vmax, (vmax - vmin) / int(utils.prod(shp)), dtype='float32').reshape(shp)

        if name == 'LAT':
            ret = np.empty(shp)
            ret.fill(45)
        elif name == 'LON':
            ret = np.empty(shp)
            ret.fill(-71)
        elif name == 'TEMPWAT_L0':
            ret = _getarr(280000, shp, 350000)
        elif name == 'CONDWAT_L0':
            ret = _getarr(100000, shp, 750000)
        elif name == 'PRESWAT_L0':
            ret = _getarr(3000, shp, 10000)
        elif name in self.value_classes: # Non-L0 parameters
            ret = self.value_classes[name][:]
        else:
            return np.zeros(shp)

        return ret
    
    def _setup_streams(self, exchange_pt1, exchange_pt2, available_fields_in=[], available_fields_out=[]):
        proc_def_id = self._create_proc_def()

        incoming_pdict_id = self._L0_pdict()
        outgoing_pdict_id = self._L1_pdict()
        
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('L0_stream_def', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('L1_stream_def', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)

        L0_data_product_id = self._data_product('L0_SBE37', incoming_stream_def_id, exchange_pt1)
        L1_data_product_id = self._data_product('L1_SBE37', outgoing_stream_def_id, exchange_pt2)

        self._data_process(proc_def_id, [L0_data_product_id], L1_data_product_id, outgoing_stream_def_id)
        
        stream_ids, _ = self.container.resource_registry.find_objects(L0_data_product_id, PRED.hasStream, None, True)
        stream_id_in = stream_ids[0]

        stream_ids, _ = self.container.resource_registry.find_objects(L1_data_product_id, PRED.hasStream, None, True)
        stream_id_out = stream_ids[0]
        
        stream_route_in = self.pubsub_management.read_stream_route(stream_id_in)
        stream_route_out = self.pubsub_management.read_stream_route(stream_id_out)

        return (stream_id_in,stream_id_out,stream_route_in,stream_route_out,incoming_stream_def_id,outgoing_stream_def_id)
    
    def _validate_transforms(self, rdt_in, rdt_out):
        #passthrus
        self.assertTrue(np.allclose(rdt_in['time'], rdt_out['time']))
        self.assertTrue(np.allclose(rdt_in['lat'], rdt_out['lat']))
        self.assertTrue(np.allclose(rdt_in['lon'], rdt_out['lon']))
        self.assertTrue(np.allclose(rdt_in['TEMPWAT_L0'], rdt_out['TEMPWAT_L0']))
        self.assertTrue(np.allclose(rdt_in['CONDWAT_L0'], rdt_out['CONDWAT_L0']))
        self.assertTrue(np.allclose(rdt_in['PRESWAT_L0'], rdt_out['PRESWAT_L0']))
        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        t1 = (rdt_out['TEMPWAT_L0'] / 10000) - 10
        self.assertTrue(np.allclose(rdt_out['TEMPWAT_L1'], t1))
        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        c1 = (rdt_out['CONDWAT_L0'] / 100000) - 0.5
        self.assertTrue(np.allclose(rdt_out['CONDWAT_L1'], c1))
        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        p1 = (rdt_out['PRESWAT_L0'] * 679.34040721 / (0.85 * 65536)) - (0.05 * 679.34040721)
        self.assertTrue(np.allclose(rdt_out['PRESWAT_L1'], p1))
        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        ps = gsw.SP_from_C((rdt_out['CONDWAT_L1'] * 10.), rdt_out['TEMPWAT_L1'], rdt_out['PRESWAT_L1'])
        self.assertTrue(np.allclose(rdt_out['PRACSAL'], ps))
        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        abs_sal = gsw.SA_from_SP(rdt_out['PRACSAL'], rdt_out['PRESWAT_L1'], rdt_out['lon'], rdt_out['lat'])
        cons_temp = gsw.CT_from_t(abs_sal, rdt_out['TEMPWAT_L1'], rdt_out['PRESWAT_L1'])
        rho = gsw.rho(abs_sal, cons_temp, rdt_out['PRESWAT_L1'])
        self.assertTrue(np.allclose(rdt_out['DENSITY'], rho))
    
    def test_execute_transform(self):
        available_fields_in = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'TEMPWAT_L1','CONDWAT_L1','PRESWAT_L1','PRACSAL', 'DENSITY']
        exchange_pt1 = 'xp1'
        exchange_pt2 = 'xp2'
        stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out)

        rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id)
        dt = 20
        rdt_in['time'] = np.arange(dt)
        rdt_in['lat'] = [40.992469] * dt
        rdt_in['lon'] = [-71.727069] * dt
        rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,))
        rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,))
        rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,))
        
        msg = rdt_in.to_granule()
        #pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',{'process':{'routes':{(stream_id_in, stream_id_out):None},'stream_id':stream_id_out}})
        config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}}
        pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config)
        rdt_out = self.container.proc_manager.procs[pid]._execute_transform(msg, (stream_id_in,stream_id_out))
        #need below to wrap result in a param val object
        rdt_out = RecordDictionaryTool.load_from_granule(rdt_out.to_granule())
        for k,v in rdt_out.iteritems():
            self.assertEqual(len(v), dt)        
        
        self._validate_transforms(rdt_in, rdt_out)
        self.container.proc_manager.terminate_process(pid)
    
    def test_transform_prime_no_available_fields(self):
        available_fields_in = []
        available_fields_out = []
        exchange_pt1 = 'xp1'
        exchange_pt2 = 'xp2'
        stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out)
        
        #launch transform
        config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}}
        pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config)
        
        #create publish
        publisher = StandaloneStreamPublisher(stream_id_in, stream_route_in)
        self.container.proc_manager.procs[pid].subscriber.xn.bind(stream_route_in.routing_key, publisher.xp)

        #data
        rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id)
        dt = 20
        rdt_in['time'] = np.arange(dt)
        rdt_in['lat'] = [40.992469] * dt
        rdt_in['lon'] = [-71.727069] * dt
        rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,))
        rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,))
        rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,))
        msg = rdt_in.to_granule()
        #publish granule to transform and have transform publish it to subsciber
        
        #validate transformed data
        e = gevent.event.Event()
        def cb(msg, sr, sid):
            self.assertEqual(sid, stream_id_out)
            rdt_out = RecordDictionaryTool.load_from_granule(msg)
            self.assertEquals(set([k for k,v in rdt_out.iteritems()]), set(available_fields_out))
            for k,v in rdt_out.iteritems():
                self.assertEquals(rdt_out[k], None)
            e.set()

        sub = StandaloneStreamSubscriber('stream_subscriber', cb)
        sub.xn.bind(stream_route_out.routing_key, getattr(self.container.proc_manager.procs[pid], stream_id_out).xp)
        self.addCleanup(sub.stop)
        sub.start()
        
        #publish msg to transform
        publisher.publish(msg)
        
        #wait to receive msg
        self.assertTrue(e.wait(4))

        #self.container.proc_manager.terminate_process(pid)

    def test_transform_prime(self):
        available_fields_in = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'TEMPWAT_L1','CONDWAT_L1','PRESWAT_L1','PRACSAL', 'DENSITY']
        exchange_pt1 = 'xp1'
        exchange_pt2 = 'xp2'
        stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out)
        
        #launch transform
        config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}}
        pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config)
        
        #create publish
        publisher = StandaloneStreamPublisher(stream_id_in, stream_route_in)
        self.container.proc_manager.procs[pid].subscriber.xn.bind(stream_route_in.routing_key, publisher.xp)

        #data
        rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id)
        dt = 20
        rdt_in['time'] = np.arange(dt)
        rdt_in['lat'] = [40.992469] * dt
        rdt_in['lon'] = [-71.727069] * dt
        rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,))
        rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,))
        rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,))
        msg = rdt_in.to_granule()
        #publish granule to transform and have transform publish it to subsciber
        
        #validate transformed data
        e = gevent.event.Event()
        def cb(msg, sr, sid):
            self.assertEqual(sid, stream_id_out)
            rdt_out = RecordDictionaryTool.load_from_granule(msg)
            self.assertEquals(set([k for k,v in rdt_out.iteritems()]), set(available_fields_out))
            self._validate_transforms(rdt_in, rdt_out)
            e.set()

        sub = StandaloneStreamSubscriber('stream_subscriber', cb)
        sub.xn.bind(stream_route_out.routing_key, getattr(self.container.proc_manager.procs[pid], stream_id_out).xp)
        self.addCleanup(sub.stop)
        sub.start()
        
        #publish msg to transform
        publisher.publish(msg)
        
        #wait to receive msg
        self.assertTrue(e.wait(4))
コード例 #2
0
class CtdbpTransformsIntTest(IonIntegrationTestCase):
    def setUp(self):
        super(CtdbpTransformsIntTest, self).setUp()

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.pubsub = PubsubManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.dataproduct_management = DataProductManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()

        # This is for the time values inside the packets going into the transform
        self.i = 0

        # Cleanup of queue created by the subscriber

    def _get_new_ctd_packet(self, stream_definition_id, length):

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)
        rdt['time'] = numpy.arange(self.i, self.i + length)

        for field in rdt:
            if isinstance(
                    rdt._pdict.get_context(field).param_type, QuantityType):
                rdt[field] = numpy.array(
                    [random.uniform(0.0, 75.0) for i in xrange(length)])

        g = rdt.to_granule()
        self.i += length

        return g

    def _create_input_param_dict_for_test(self, parameter_dict_name=''):

        pdict = ParameterDictionary()

        t_ctxt = ParameterContext(
            'time',
            param_type=QuantityType(value_encoding=numpy.dtype('float64')))
        t_ctxt.axis = AxisTypeEnum.TIME
        t_ctxt.uom = 'seconds since 01-01-1900'
        pdict.add_context(t_ctxt)

        cond_ctxt = ParameterContext(
            'conductivity',
            param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        cond_ctxt.uom = ''
        pdict.add_context(cond_ctxt)

        pres_ctxt = ParameterContext(
            'pressure',
            param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        pres_ctxt.uom = ''
        pdict.add_context(pres_ctxt)

        temp_ctxt = ParameterContext(
            'temperature',
            param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        temp_ctxt.uom = ''
        pdict.add_context(temp_ctxt)

        dens_ctxt = ParameterContext(
            'density',
            param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        dens_ctxt.uom = ''
        pdict.add_context(dens_ctxt)

        sal_ctxt = ParameterContext(
            'salinity',
            param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        sal_ctxt.uom = ''
        pdict.add_context(sal_ctxt)

        #create temp streamdef so the data product can create the stream
        pc_list = []
        for pc_k, pc in pdict.iteritems():
            ctxt_id = self.dataset_management.create_parameter_context(
                pc_k, pc[1].dump())
            pc_list.append(ctxt_id)
            self.addCleanup(self.dataset_management.delete_parameter_context,
                            ctxt_id)

        pdict_id = self.dataset_management.create_parameter_dictionary(
            parameter_dict_name, pc_list)
        self.addCleanup(self.dataset_management.delete_parameter_dictionary,
                        pdict_id)

        return pdict_id

    def test_ctdbp_L0_all(self):
        """
        Test packets processed by the ctdbp_L0_all transform
        """

        #----------- Data Process Definition --------------------------------

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='CTDBP_L0_all',
            description=
            'Take parsed stream and put the C, T and P into three separate L0 streams.',
            module='ion.processes.data.transforms.ctdbp.ctdbp_L0',
            class_name='CTDBP_L0_all')

        dprocdef_id = self.data_process_management.create_data_process_definition(
            dpd_obj)
        self.addCleanup(
            self.data_process_management.delete_data_process_definition,
            dprocdef_id)

        log.debug("created data process definition: id = %s", dprocdef_id)

        #----------- Data Products --------------------------------

        # Construct temporal and spatial Coordinate Reference System objects
        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()

        input_param_dict = self._create_input_param_dict_for_test(
            parameter_dict_name='fictitious_ctdp_param_dict')

        # Get the stream definition for the stream using the parameter dictionary
        #        input_param_dict = self.dataset_management.read_parameter_dictionary_by_name('ctdbp_cdef_sample', id_only=True)
        input_stream_def_dict = self.pubsub.create_stream_definition(
            name='parsed', parameter_dictionary_id=input_param_dict)
        self.addCleanup(self.pubsub.delete_stream_definition,
                        input_stream_def_dict)

        log.debug("Got the parsed parameter dictionary: id: %s",
                  input_param_dict)
        log.debug("Got the stream def for parsed input: %s",
                  input_stream_def_dict)

        # Input data product
        parsed_stream_dp_obj = IonObject(
            RT.DataProduct,
            name='parsed_stream',
            description='Parsed stream input to CTBP L0 transform',
            temporal_domain=tdom,
            spatial_domain=sdom)

        input_dp_id = self.dataproduct_management.create_data_product(
            data_product=parsed_stream_dp_obj,
            stream_definition_id=input_stream_def_dict)
        self.addCleanup(self.dataproduct_management.delete_data_product,
                        input_dp_id)

        # output data product
        L0_stream_dp_obj = IonObject(
            RT.DataProduct,
            name='L0_stream',
            description='L0_stream output of CTBP L0 transform',
            temporal_domain=tdom,
            spatial_domain=sdom)

        L0_stream_dp_id = self.dataproduct_management.create_data_product(
            data_product=L0_stream_dp_obj,
            stream_definition_id=input_stream_def_dict)
        self.addCleanup(self.dataproduct_management.delete_data_product,
                        L0_stream_dp_id)

        # We need the key name here to be "L0_stream", since when the data process is launched, this name goes into
        # the config as in config.process.publish_streams.L0_stream when the config is used to launch the data process
        out_stream_ids, _ = self.resource_registry.find_objects(
            L0_stream_dp_id, PRED.hasStream, RT.Stream, True)
        self.assertTrue(len(out_stream_ids))
        output_stream_id = out_stream_ids[0]

        dproc_id = self.data_process_management.create_data_process(
            data_process_definition_id=dprocdef_id,
            in_data_product_ids=[input_dp_id],
            out_data_product_ids=[L0_stream_dp_id],
            configuration=None)

        self.addCleanup(self.data_process_management.delete_data_process,
                        dproc_id)

        log.debug("Created a data process for ctdbp_L0. id: %s", dproc_id)

        # Activate the data process
        self.data_process_management.activate_data_process(dproc_id)
        self.addCleanup(self.data_process_management.deactivate_data_process,
                        dproc_id)

        #----------- Find the stream that is associated with the input data product when it was created by create_data_product() --------------------------------

        stream_ids, _ = self.resource_registry.find_objects(
            input_dp_id, PRED.hasStream, RT.Stream, True)
        self.assertTrue(len(stream_ids))

        input_stream_id = stream_ids[0]
        stream_route = self.pubsub.read_stream_route(input_stream_id)

        log.debug("The input stream for the L0 transform: %s", input_stream_id)

        #----------- Create a subscriber that will listen to the transform's output --------------------------------

        ar = gevent.event.AsyncResult()

        def subscriber(m, r, s):
            ar.set(m)

        sub = StandaloneStreamSubscriber(exchange_name='sub',
                                         callback=subscriber)

        sub_id = self.pubsub.create_subscription('subscriber_to_transform',
                                                 stream_ids=[output_stream_id],
                                                 exchange_name='sub')
        self.addCleanup(self.pubsub.delete_subscription, sub_id)

        self.pubsub.activate_subscription(sub_id)
        self.addCleanup(self.pubsub.deactivate_subscription, sub_id)

        sub.start()
        self.addCleanup(sub.stop)

        #----------- Publish on that stream so that the transform can receive it --------------------------------

        pub = StandaloneStreamPublisher(input_stream_id, stream_route)
        publish_granule = self._get_new_ctd_packet(
            stream_definition_id=input_stream_def_dict, length=5)

        pub.publish(publish_granule)

        log.debug("Published the following granule: %s", publish_granule)

        granule_from_transform = ar.get(timeout=20)

        log.debug("Got the following granule from the transform: %s",
                  granule_from_transform)

        # Check that the granule published by the L0 transform has the right properties
        self._check_granule_from_transform(granule_from_transform)

    def _check_granule_from_transform(self, granule):
        """
        An internal method to check if a granule has the right properties
        """

        pass
コード例 #3
0
class DatasetManagementIntTest(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.resource_registry  = ResourceRegistryServiceClient()
        self.dataset_management = DatasetManagementServiceClient()

    def test_dataset_crud(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        tdom, sdom = time_series_domain()
        dataset_id = self.dataset_management.create_dataset(name='ctd_dataset', parameter_dictionary_id=pdict_id, spatial_domain=sdom.dump(), temporal_domain=tdom.dump())

        ds_obj = self.dataset_management.read_dataset(dataset_id)
        self.assertEquals(ds_obj.name, 'ctd_dataset')
        
        ds_obj.name = 'something different'
        self.dataset_management.update_dataset(ds_obj)
        self.dataset_management.register_dataset(dataset_id)
        ds_obj2 = self.dataset_management.read_dataset(dataset_id)
        self.assertEquals(ds_obj.name, ds_obj2.name)
        self.assertTrue(ds_obj2.registered)

   
    
    def test_context_crud(self):
        context_ids = self.create_contexts()
        context_id = context_ids.pop()

        context = DatasetManagementService.get_parameter_context(context_id)
        self.assertIsInstance(context, ParameterContext)
        self.assertEquals(context.identifier, context_id)

        self.dataset_management.delete_parameter_context(context_id)

        with self.assertRaises(NotFound):
            self.dataset_management.read_parameter_context(context_id)

    def test_pdict_crud(self):
        context_ids = self.create_contexts()
        pdict_res_id = self.dataset_management.create_parameter_dictionary(name='pdict1', parameter_context_ids=context_ids, temporal_context='time')

        pdict_contexts = self.dataset_management.read_parameter_contexts(parameter_dictionary_id=pdict_res_id, id_only=True)

        pdict = DatasetManagementService.get_parameter_dictionary(pdict_res_id)
        self.assertIsInstance(pdict, ParameterDictionary)
        self.assertTrue('time_test' in pdict)
        self.assertEquals(pdict.identifier, pdict_res_id)

        self.assertEquals(set(pdict_contexts), set(context_ids))

        self.dataset_management.delete_parameter_dictionary(parameter_dictionary_id=pdict_res_id)
        with self.assertRaises(NotFound):
            self.dataset_management.read_parameter_dictionary(parameter_dictionary_id=pdict_res_id)

    def create_contexts(self):
        context_ids = []
        cond_ctxt = ParameterContext('conductivity_test', param_type=QuantityType(value_encoding=np.float32))
        cond_ctxt.uom = 'unknown'
        cond_ctxt.fill_value = 0e0
        context_ids.append(self.dataset_management.create_parameter_context(name='conductivity_test', parameter_context=cond_ctxt.dump()))

        pres_ctxt = ParameterContext('pressure_test', param_type=QuantityType(value_encoding=np.float32))
        pres_ctxt.uom = 'Pascal'
        pres_ctxt.fill_value = 0x0
        context_ids.append(self.dataset_management.create_parameter_context(name='pressure_test', parameter_context=pres_ctxt.dump()))

        sal_ctxt = ParameterContext('salinity_test', param_type=QuantityType(value_encoding=np.float32))
        sal_ctxt.uom = 'PSU'
        sal_ctxt.fill_value = 0x0
        context_ids.append(self.dataset_management.create_parameter_context(name='salinity_test', parameter_context=sal_ctxt.dump()))

        temp_ctxt = ParameterContext('temp_test', param_type=QuantityType(value_encoding=np.float32))
        temp_ctxt.uom = 'degree_Celsius'
        temp_ctxt.fill_value = 0e0
        context_ids.append(self.dataset_management.create_parameter_context(name='temp_test', parameter_context=temp_ctxt.dump()))

        t_ctxt = ParameterContext('time_test', param_type=QuantityType(value_encoding=np.int64))
        t_ctxt.uom = 'seconds since 1970-01-01'
        t_ctxt.fill_value = 0x0
        context_ids.append(self.dataset_management.create_parameter_context(name='time_test', parameter_context=t_ctxt.dump()))

        return context_ids
コード例 #4
0
    def _setup_resources(self):
        # TODO: some or all of this (or some variation) should move to DAMS'

        # Build the test resources for the dataset
        dms_cli = DatasetManagementServiceClient()
        dams_cli = DataAcquisitionManagementServiceClient()
        dpms_cli = DataProductManagementServiceClient()
        rr_cli = ResourceRegistryServiceClient()
        pubsub_cli = PubsubManagementServiceClient()

        eda = ExternalDatasetAgent(name='example eda',handler_module=self.DVR_CONFIG['dvr_mod'],
            handler_class=self.DVR_CONFIG['dvr_cls'])
        eda_id = dams_cli.create_external_dataset_agent(eda)

        eda_inst = ExternalDatasetAgentInstance(name='example eda instance')
        eda_inst_id = dams_cli.create_external_dataset_agent_instance(eda_inst,
            external_dataset_agent_id=eda_id)

        # Create and register the necessary resources/objects

        # Create DataProvider
        dprov = ExternalDataProvider(name='example data provider', institution=Institution(),
            contact=ContactInformation())
        dprov.contact.individual_names_given = 'Christopher Mueller'
        dprov.contact.email = '*****@*****.**'

        # Create DataSource
        dsrc = DataSource(name='example datasource', protocol_type='DAP', institution=Institution(),
            contact=ContactInformation())
        dsrc.connection_params['base_data_url'] = ''
        dsrc.contact.individual_names_given = 'Tim Giguere'
        dsrc.contact.email = '*****@*****.**'

        # Create ExternalDataset
        ds_name = 'usgs_test_dataset'
        dset = ExternalDataset(name=ds_name,
            dataset_description=DatasetDescription(),
            update_description=UpdateDescription(),
            contact=ContactInformation())

        # The usgs.nc test dataset is a download of the R1 dataset found here:
        # http://thredds-test.oceanobservatories.org/thredds/dodsC/ooiciData/E66B1A74-A684-454A-9ADE-8388C2C634E5.ncml
        dset.dataset_description.parameters['dataset_path'] = 'test_data/usgs.nc'
        dset.dataset_description.parameters['temporal_dimension'] = 'time'
        dset.dataset_description.parameters['zonal_dimension'] = 'lon'
        dset.dataset_description.parameters['meridional_dimension'] = 'lat'
        dset.dataset_description.parameters['vertical_dimension'] = 'z'
        dset.dataset_description.parameters['variables'] = [
            'water_temperature',
            'streamflow',
            'water_temperature_bottom',
            'water_temperature_middle',
            'specific_conductance',
            'data_qualifier', ]

        # Create DataSourceModel
        dsrc_model = DataSourceModel(name='dap_model')
        #dsrc_model.model = 'DAP'
        dsrc_model.data_handler_module = 'N/A'
        dsrc_model.data_handler_class = 'N/A'

        ## Run everything through DAMS
        ds_id = dams_cli.create_external_dataset(external_dataset=dset)
        ext_dprov_id = dams_cli.create_external_data_provider(external_data_provider=dprov)
        ext_dsrc_id = dams_cli.create_data_source(data_source=dsrc)
        ext_dsrc_model_id = dams_cli.create_data_source_model(dsrc_model)

        # Register the ExternalDataset
        dproducer_id = dams_cli.register_external_data_set(external_dataset_id=ds_id)

        # Or using each method
        dams_cli.assign_data_source_to_external_data_provider(data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id)
        dams_cli.assign_data_source_to_data_model(data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id)
        dams_cli.assign_external_dataset_to_data_source(external_dataset_id=ds_id, data_source_id=ext_dsrc_id)
        dams_cli.assign_external_dataset_to_agent_instance(external_dataset_id=ds_id, agent_instance_id=eda_inst_id)
        #        dams_cli.assign_external_data_agent_to_agent_instance(external_data_agent_id=self.eda_id, agent_instance_id=self.eda_inst_id)

        #create temp streamdef so the data product can create the stream
        pc_list = []

        #Get 'time' parameter context
        pc_list.append(dms_cli.read_parameter_context_by_name('time', id_only=True))

        for pc_k, pc in self._create_parameter_dictionary().iteritems():
            pc_list.append(dms_cli.create_parameter_context(pc_k, pc[1].dump()))

        pdict_id = dms_cli.create_parameter_dictionary('netcdf_param_dict', pc_list)

        #create temp streamdef so the data product can create the stream
        streamdef_id = pubsub_cli.create_stream_definition(name="netcdf", description="netcdf", parameter_dictionary_id=pdict_id)

        tdom, sdom = time_series_domain()
        tdom, sdom = tdom.dump(), sdom.dump()

        dprod = IonObject(RT.DataProduct,
            name='usgs_parsed_product',
            description='parsed usgs product',
            temporal_domain=tdom,
            spatial_domain=sdom)

        # Generate the data product and associate it to the ExternalDataset
        dproduct_id = dpms_cli.create_data_product(data_product=dprod,
            stream_definition_id=streamdef_id)

        dams_cli.assign_data_product(input_resource_id=ds_id, data_product_id=dproduct_id)

        stream_id, assn = rr_cli.find_objects(subject=dproduct_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True)
        stream_id = stream_id[0]

        log.info('Created resources: {0}'.format({'ExternalDataset': ds_id, 'ExternalDataProvider': ext_dprov_id, 'DataSource': ext_dsrc_id, 'DataSourceModel': ext_dsrc_model_id, 'DataProducer': dproducer_id, 'DataProduct': dproduct_id, 'Stream': stream_id}))

        # Create the logger for receiving publications
        _, stream_route, _ = self.create_stream_and_logger(name='usgs', stream_id=stream_id)

        self.EDA_RESOURCE_ID = ds_id
        self.EDA_NAME = ds_name
        self.DVR_CONFIG['dh_cfg'] = {
            'TESTING': True,
            'stream_id': stream_id,
            'stream_route': stream_route,
            'stream_def': streamdef_id,
            'data_producer_id': dproducer_id,  # CBM: Should this be put in the main body of the config - with mod & cls?
            'max_records': 1,
            }
コード例 #5
0
class TestDMEnd2End(IonIntegrationTestCase):
    def setUp(self): # Love the non pep-8 convention
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.process_dispatcher   = ProcessDispatcherServiceClient()
        self.pubsub_management    = PubsubManagementServiceClient()
        self.resource_registry    = ResourceRegistryServiceClient()
        self.dataset_management   = DatasetManagementServiceClient()
        self.ingestion_management = IngestionManagementServiceClient()
        self.data_retriever       = DataRetrieverServiceClient()
        self.pids                 = []
        self.event                = Event()
        self.exchange_space_name  = 'test_granules'
        self.exchange_point_name  = 'science_data'       
        self.i                    = 0

        self.purge_queues()
        self.queue_buffer         = []
        self.streams = []
        self.addCleanup(self.stop_all_ingestion)

    def purge_queues(self):
        xn = self.container.ex_manager.create_xn_queue('science_granule_ingestion')
        xn.purge()
        

    def tearDown(self):
        self.purge_queues()
        for pid in self.pids:
            self.container.proc_manager.terminate_process(pid)
        IngestionManagementIntTest.clean_subscriptions()
        for queue in self.queue_buffer:
            if isinstance(queue, ExchangeNameQueue):
                queue.delete()
            elif isinstance(queue, str):
                xn = self.container.ex_manager.create_xn_queue(queue)
                xn.delete()

    #--------------------------------------------------------------------------------
    # Helper/Utility methods
    #--------------------------------------------------------------------------------
        
    def create_dataset(self, parameter_dict_id=''):
        '''
        Creates a time-series dataset
        '''
        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()
        if not parameter_dict_id:
            parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)

        dataset_id = self.dataset_management.create_dataset('test_dataset_%i'%self.i, parameter_dictionary_id=parameter_dict_id, spatial_domain=sdom, temporal_domain=tdom)
        return dataset_id
    
    def get_datastore(self, dataset_id):
        '''
        Gets an instance of the datastore
            This method is primarily used to defeat a bug where integration tests in multiple containers may sometimes 
            delete a CouchDB datastore and the other containers are unaware of the new state of the datastore.
        '''
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore
    
    def get_ingestion_config(self):
        '''
        Grab the ingestion configuration from the resource registry
        '''
        # The ingestion configuration should have been created by the bootstrap service 
        # which is configured through r2deploy.yml

        ingest_configs, _  = self.resource_registry.find_resources(restype=RT.IngestionConfiguration,id_only=True)
        return ingest_configs[0]

    def launch_producer(self, stream_id=''):
        '''
        Launch the producer
        '''

        pid = self.container.spawn_process('better_data_producer', 'ion.processes.data.example_data_producer', 'BetterDataProducer', {'process':{'stream_id':stream_id}})

        self.pids.append(pid)

    def make_simple_dataset(self):
        '''
        Makes a stream, a stream definition and a dataset, the essentials for most of these tests
        '''
        pdict_id             = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id        = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)
        stream_id, route     = self.pubsub_management.create_stream('ctd stream %i' % self.i, 'xp1', stream_definition_id=stream_def_id)

        dataset_id = self.create_dataset(pdict_id)

        self.get_datastore(dataset_id)
        self.i += 1
        return stream_id, route, stream_def_id, dataset_id

    def publish_hifi(self,stream_id,stream_route,offset=0):
        '''
        Publish deterministic data
        '''

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10) + (offset * 10)
        rdt['temp'] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())

    def publish_fake_data(self,stream_id, route):
        '''
        Make four granules
        '''
        for i in xrange(4):
            self.publish_hifi(stream_id,route,i)

    def start_ingestion(self, stream_id, dataset_id):
        '''
        Starts ingestion/persistence for a given dataset
        '''
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)
    
    def stop_ingestion(self, stream_id):
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id)
        
    def stop_all_ingestion(self):
        try:
            [self.stop_ingestion(sid) for sid in self.streams]
        except:
            pass

    def validate_granule_subscription(self, msg, route, stream_id):
        '''
        Validation for granule format
        '''
        if msg == {}:
            return
        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.info('%s', rdt.pretty_print())
        self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg))
        self.event.set()

    def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40):
        '''
        Loops until there is a sufficient amount of data in the dataset
        '''
        done = False
        with gevent.Timeout(40):
            while not done:
                extents = self.dataset_management.dataset_extents(dataset_id, 'time')[0]
                granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1)
                rdt     = RecordDictionaryTool.load_from_granule(granule)
                if rdt['time'] and rdt['time'][0] != rdt._pdict.get_context('time').fill_value and extents >= data_size:
                    done = True
                else:
                    gevent.sleep(0.2)




    #--------------------------------------------------------------------------------
    # Test Methods
    #--------------------------------------------------------------------------------

    @attr('SMOKE') 
    def test_dm_end_2_end(self):
        #--------------------------------------------------------------------------------
        # Set up a stream and have a mock instrument (producer) send data
        #--------------------------------------------------------------------------------
        self.event.clear()

        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        
        stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)


        stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)




        #--------------------------------------------------------------------------------
        # Start persisting the data on the stream 
        # - Get the ingestion configuration from the resource registry
        # - Create the dataset
        # - call persist_data_stream to setup the subscription for the ingestion workers
        #   on the stream that you specify which causes the data to be persisted
        #--------------------------------------------------------------------------------

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)

        #--------------------------------------------------------------------------------
        # Now the granules are ingesting and persisted
        #--------------------------------------------------------------------------------

        self.launch_producer(stream_id)
        self.wait_until_we_have_enough_granules(dataset_id,40)
        
        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------
        
        replay_data = self.data_retriever.retrieve(dataset_id)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:])
        self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all())

        
        #--------------------------------------------------------------------------------
        # Now to try the streamed approach
        #--------------------------------------------------------------------------------
        replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)
        self.replay_id, process_id =  self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id)
        log.info('Process ID: %s', process_id)

        replay_client = ReplayClient(process_id)

    
        #--------------------------------------------------------------------------------
        # Create the listening endpoint for the the retriever to talk to 
        #--------------------------------------------------------------------------------
        xp = self.container.ex_manager.create_xp(self.exchange_point_name)
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        self.data_retriever.start_replay_agent(self.replay_id)

        self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched')
        replay_client.start_replay()
        
        self.assertTrue(self.event.wait(10))
        subscriber.stop()

        self.data_retriever.cancel_replay_agent(self.replay_id)


        #--------------------------------------------------------------------------------
        # Test the slicing capabilities
        #--------------------------------------------------------------------------------

        granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)})
        rdt = RecordDictionaryTool.load_from_granule(granule)
        b = rdt['time'] == np.arange(5)
        self.assertTrue(b.all() if not isinstance(b,bool) else b)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)


    def test_coverage_transform(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_parsed()
        stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)

        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)
        publisher = StandaloneStreamPublisher(stream_id, route)
        
        rdt = ph.get_rdt(stream_def_id)
        ph.fill_parsed_rdt(rdt)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.event.wait(30))

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time'])
        np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp'])

        np.testing.assert_array_almost_equal(rdt_out['conductivity_L1'], np.array([42.914]))
        np.testing.assert_array_almost_equal(rdt_out['temp_L1'], np.array([20.]))
        np.testing.assert_array_almost_equal(rdt_out['pressure_L1'], np.array([3.068]))
        np.testing.assert_array_almost_equal(rdt_out['density'], np.array([1021.7144739593881]))
        np.testing.assert_array_almost_equal(rdt_out['salinity'], np.array([30.935132729668283]))


    def test_qc_events(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_qc_pdict()
        stream_def_id = self.pubsub_management.create_stream_definition('qc stream def', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('qc stream', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        config = DotDict()

        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id, config=config)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.arange(10) * 3

        verified = Event()
        def verification(event, *args, **kwargs):
            self.assertEquals(event.qc_parameter, 'temp_qc')
            self.assertEquals(event.temporal_value, 7)
            verified.set()

        es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=dataset_id, callback=verification, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(verified.wait(10))



    def test_lookup_values_ingest_replay(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()
        stream_def_id = self.pubsub_management.create_stream_definition('lookups', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        config = DotDict()
        config.process.lookup_docs = ['test1', 'test2']
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id, config=config)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)

        stored_value_manager = StoredValueManager(self.container)
        stored_value_manager.stored_value_cas('test1',{'offset_a':10.0, 'offset_b':13.1})
        
        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = [20.0] * 20

        granule = rdt.to_granule()

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(30))
        
        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], np.arange(20))
        np.testing.assert_array_almost_equal(rdt_out['temp'], np.array([20.] * 20))
        np.testing.assert_array_almost_equal(rdt_out['calibrated'], np.array([30.]*20))
        np.testing.assert_array_equal(rdt_out['offset_b'], np.array([rdt_out.fill_value('offset_b')] * 20))

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(20,40)
        rdt['temp'] = [20.0] * 20
        granule = rdt.to_granule()

        dataset_monitor.event.clear()

        stored_value_manager.stored_value_cas('test1',{'offset_a':20.0})
        stored_value_manager.stored_value_cas('coefficient_document',{'offset_b':10.0})
        gevent.sleep(2)

        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(30))

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], np.arange(40))
        np.testing.assert_array_almost_equal(rdt_out['temp'], np.array([20.] * 20 + [20.] * 20))
        np.testing.assert_array_equal(rdt_out['offset_b'], np.array([10.] * 40))
        np.testing.assert_array_almost_equal(rdt_out['calibrated'], np.array([30.]*20 + [40.]*20))
        np.testing.assert_array_almost_equal(rdt_out['calibrated_b'], np.array([40.] * 20 + [50.] * 20))



    @unittest.skip('Doesnt work')
    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_replay_pause(self):
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        

        stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id)
        replay_stream, replay_route = self.pubsub_management.create_stream('replay', 'xp1', stream_definition_id=stream_def_id)
        dataset_id = self.create_dataset(pdict_id)
        scov = DatasetManagementService._get_simplex_coverage(dataset_id)

        bb = CoverageCraft(scov)
        bb.rdt['time'] = np.arange(100)
        bb.rdt['temp'] = np.random.random(100) + 30
        bb.sync_with_granule()

        DatasetManagementService._persist_coverage(dataset_id, bb.coverage) # This invalidates it for multi-host configurations
        # Set up the subscriber to verify the data
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        xp = self.container.ex_manager.create_xp('xp1')
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        # Set up the replay agent and the client wrapper

        # 1) Define the Replay (dataset and stream to publish on)
        self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream)
        # 2) Make a client to the interact with the process (optionall provide it a process to bind with)
        replay_client = ReplayClient(process_id)
        # 3) Start the agent (launch the process)
        self.data_retriever.start_replay_agent(self.replay_id)
        # 4) Start replaying...
        replay_client.start_replay()
        
        # Wait till we get some granules
        self.assertTrue(self.event.wait(5))
        
        # We got granules, pause the replay, clear the queue and allow the process to finish consuming
        replay_client.pause_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()
        
        # Make sure there's no remaining messages being consumed
        self.assertFalse(self.event.wait(1))

        # Resume the replay and wait until we start getting granules again
        replay_client.resume_replay()
        self.assertTrue(self.event.wait(5))
    
        # Stop the replay, clear the queues
        replay_client.stop_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure that it did indeed stop
        self.assertFalse(self.event.wait(1))

        subscriber.stop()


    def test_retrieve_and_transform(self):
        # Make a simple dataset and start ingestion, pretty standard stuff.
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(ctd_stream_id, dataset_id)

        # Stream definition for the salinity data
        salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        sal_stream_def_id = self.pubsub_management.create_stream_definition('sal data', parameter_dictionary_id=salinity_pdict_id)


        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['conductivity'] = np.random.randn(10) * 2 + 10
        rdt['pressure'] = np.random.randn(10) * 1 + 12

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        publisher.publish(rdt.to_granule())

        rdt['time'] = np.arange(10,20)

        publisher.publish(rdt.to_granule())


        self.wait_until_we_have_enough_granules(dataset_id, 20)

        granule = self.data_retriever.retrieve(dataset_id, 
                                             None,
                                             None, 
                                             'ion.processes.data.transforms.ctd.ctd_L2_salinity',
                                             'CTDL2SalinityTransformAlgorithm', 
                                             kwargs=dict(params=sal_stream_def_id))
        rdt = RecordDictionaryTool.load_from_granule(granule)
        for i in rdt['salinity']:
            self.assertNotEquals(i,0)
        self.streams.append(ctd_stream_id)
        self.stop_ingestion(ctd_stream_id)

    def test_last_granule(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)

        self.publish_hifi(stream_id,route, 0)
        self.publish_hifi(stream_id,route, 1)
        

        self.wait_until_we_have_enough_granules(dataset_id,20) # I just need two


        success = False
        def verifier():
                replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id, 10)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(10) + 10
                if not isinstance(comp,bool):
                    return comp.all()
                return False
        success = poll(verifier)

        self.assertTrue(success)

        success = False
        def verify_points():
                replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id,5)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(15,20)
                if not isinstance(comp,bool):
                    return comp.all()
                return False
        success = poll(verify_points)

        self.assertTrue(success)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    def test_replay_with_parameters(self):
        #--------------------------------------------------------------------------------
        # Create the configurations and the dataset
        #--------------------------------------------------------------------------------
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        

        stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id)
        
        stream_id, route  = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        config_id  = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id)

        dataset_monitor = DatasetMonitor(dataset_id)

        self.addCleanup(dataset_monitor.stop)

        self.publish_fake_data(stream_id, route)

        self.assertTrue(dataset_monitor.event.wait(30))

        query = {
            'start_time': 0 - 2208988800,
            'end_time':   20 - 2208988800,
            'stride_time' : 2,
            'parameters': ['time','temp']
        }
        retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query)

        rdt = RecordDictionaryTool.load_from_granule(retrieved_data)
        comp = np.arange(0,20,2) == rdt['time']
        self.assertTrue(comp.all(),'%s' % rdt.pretty_print())
        self.assertEquals(set(rdt.iterkeys()), set(['time','temp']))

        extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp'])
        self.assertTrue(extents['time']>=20)
        self.assertTrue(extents['temp']>=20)

        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)
        

    def test_repersist_data(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.publish_hifi(stream_id,route,0)
        self.publish_hifi(stream_id,route,1)
        self.wait_until_we_have_enough_granules(dataset_id,20)
        config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id)
        self.publish_hifi(stream_id,route,2)
        self.publish_hifi(stream_id,route,3)
        self.wait_until_we_have_enough_granules(dataset_id,40)
        success = False
        with gevent.timeout.Timeout(5):
            while not success:

                replay_granule = self.data_retriever.retrieve(dataset_id)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(0,40)
                if not isinstance(comp,bool):
                    success = comp.all()
                gevent.sleep(1)

        self.assertTrue(success)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_correct_time(self):

        # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e. 
        #  the conversion factor between unix and NTP time
        unix_now = np.floor(time.time())
        ntp_now  = unix_now + 2208988800 

        unix_ago = unix_now - 20
        ntp_ago  = unix_ago + 2208988800

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        coverage = DatasetManagementService._get_simplex_coverage(dataset_id)
        coverage.insert_timesteps(20)
        coverage.set_parameter_values('time', np.arange(ntp_ago,ntp_now))
        
        temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id)

        self.assertTrue( np.abs(temporal_bounds[0] - unix_ago) < 2)
        self.assertTrue( np.abs(temporal_bounds[1] - unix_now) < 2)


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_empty_coverage_time(self):

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        coverage = DatasetManagementService._get_coverage(dataset_id)
        temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id)
        self.assertEquals([coverage.get_parameter_context('time').fill_value] *2, temporal_bounds)


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_out_of_band_retrieve(self):
        # Setup the environemnt
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        
        # Fill the dataset
        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id,40)

        # Retrieve the data
        granule = DataRetrieverService.retrieve_oob(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assertTrue((rdt['time'] == np.arange(40)).all())

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_retrieve_cache(self):
        DataRetrieverService._refresh_interval = 1
        datasets = [self.make_simple_dataset() for i in xrange(10)]
        for stream_id, route, stream_def_id, dataset_id in datasets:
            coverage = DatasetManagementService._get_simplex_coverage(dataset_id)
            coverage.insert_timesteps(10)
            coverage.set_parameter_values('time', np.arange(10))
            coverage.set_parameter_values('temp', np.arange(10))

        # Verify cache hit and refresh
        dataset_ids = [i[3] for i in datasets]
        self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache)
        DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache
        cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        # Verify that it was hit and it's now in there
        self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache)

        gevent.sleep(DataRetrieverService._refresh_interval + 0.2)

        DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache
        cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        self.assertTrue(age2 != age)

        for dataset_id in dataset_ids:
            DataRetrieverService._get_coverage(dataset_id)
        
        self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache)

        stream_id, route, stream_def, dataset_id = datasets[0]
        self.start_ingestion(stream_id, dataset_id)
        DataRetrieverService._get_coverage(dataset_id)
        
        self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache)

        DataRetrieverService._refresh_interval = 100
        self.publish_hifi(stream_id,route,1)
        self.wait_until_we_have_enough_granules(dataset_id, data_size=20)
            
 
        event = gevent.event.Event()
        with gevent.Timeout(20):
            while not event.wait(0.1):
                if dataset_id not in DataRetrieverService._retrieve_cache:
                    event.set()


        self.assertTrue(event.is_set())

        
    def publish_and_wait(self, dataset_id, granule):
        stream_ids, _ = self.resource_registry.find_objects(dataset_id, PRED.hasStream,id_only=True)
        stream_id=stream_ids[0]
        route = self.pubsub_management.read_stream_route(stream_id)
        publisher = StandaloneStreamPublisher(stream_id,route)
        dataset_monitor = DatasetMonitor(dataset_id)
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(10))

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_thorough_gap_analysis(self):
        dataset_id = self.test_ingestion_gap_analysis()
        vcov = DatasetManagementService._get_coverage(dataset_id)

        self.assertIsInstance(vcov,ViewCoverage)
        ccov = vcov.reference_coverage

        self.assertIsInstance(ccov, ComplexCoverage)
        self.assertEquals(len(ccov._reference_covs), 3)


    def test_ingestion_gap_analysis(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        connection1 = uuid4().hex
        connection2 = uuid4().hex

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [0]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index='0'))
        rdt['time'] = [1]
        rdt['temp'] = [1]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index=1))
        rdt['time'] = [2]
        rdt['temp'] = [2]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index='3')) # Gap, missed message
        rdt['time'] = [3]
        rdt['temp'] = [3]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index='3')) # Gap, new connection
        rdt['time'] = [4]
        rdt['temp'] = [4]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index='4'))
        rdt['time'] = [5]
        rdt['temp'] = [5]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index=5))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(6))
        np.testing.assert_array_equal(rdt['temp'], np.arange(6))
        return dataset_id


    @unittest.skip('Outdated due to ingestion retry')
    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_ingestion_failover(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        
        event = Event()

        def cb(*args, **kwargs):
            event.set()

        sub = EventSubscriber(event_type="ExceptionEvent", callback=cb, origin="stream_exception")
        sub.start()

        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id, 40)
        
        file_path = DatasetManagementService._get_coverage_path(dataset_id)
        master_file = os.path.join(file_path, '%s_master.hdf5' % dataset_id)

        with open(master_file, 'w') as f:
            f.write('this will crash HDF')

        self.publish_hifi(stream_id, route, 5)


        self.assertTrue(event.wait(10))

        sub.stop()

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_coverage_types(self):
        # Make a simple dataset and start ingestion, pretty standard stuff.
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        cov = DatasetManagementService._get_coverage(dataset_id=dataset_id)
        self.assertIsInstance(cov, ViewCoverage)

        cov = DatasetManagementService._get_simplex_coverage(dataset_id=dataset_id)
        self.assertIsInstance(cov, SimplexCoverage)
コード例 #6
0
class RecordDictionaryIntegrationTest(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.dataset_management = DatasetManagementServiceClient()
        self.pubsub_management  = PubsubManagementServiceClient()

        self.rdt                      = None
        self.data_producer_id         = None
        self.provider_metadata_update = None
        self.event                    = Event()

    def verify_incoming(self, m,r,s):
        rdt = RecordDictionaryTool.load_from_granule(m)
        for k,v in rdt.iteritems():
            np.testing.assert_array_equal(v, self.rdt[k])
        self.assertEquals(m.data_producer_id, self.data_producer_id)
        self.assertEquals(m.provider_metadata_update, self.provider_metadata_update)
        self.assertNotEqual(m.creation_timestamp, None)
        self.event.set()


    def test_serialize_compatability(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition('ctd extended', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('ctd1', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        sub_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)

        verified = Event()
        def verifier(msg, route, stream_id):
            for k,v in msg.record_dictionary.iteritems():
                if v is not None:
                    self.assertIsInstance(v, np.ndarray)
            rdt = RecordDictionaryTool.load_from_granule(msg)
            for k,v in rdt.iteritems():
                self.assertIsInstance(rdt[k], np.ndarray)
                self.assertIsInstance(v, np.ndarray)
            verified.set()

        subscriber = StandaloneStreamSubscriber('sub1', callback=verifier)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        publisher = StandaloneStreamPublisher(stream_id,route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        ph.fill_rdt(rdt,10)
        publisher.publish(rdt.to_granule())
        self.assertTrue(verified.wait(60))


    def test_granule(self):
        
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"})
        pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict')
        self.addCleanup(self.pubsub_management.delete_stream_definition,stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream,stream_id)
        publisher = StandaloneStreamPublisher(stream_id, route)

        subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id])
        self.pubsub_management.activate_subscription(subscription_id)


        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['pressure'] = [20] * 10

        self.assertEquals(set(pdict.keys()), set(rdt.fields))
        self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter)

        self.assertEquals(rdt._stream_config['reference_designator'],"GA03FLMA-RI001-13-CTDMOG999")

        self.rdt = rdt
        self.data_producer_id = 'data_producer'
        self.provider_metadata_update = {1:1}

        publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1}))

        self.assertTrue(self.event.wait(10))
        
        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
        
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.array([None,None,None])
        self.assertTrue(rdt['time'] is None)
        
        rdt['time'] = np.array([None, 1, 2])
        self.assertEquals(rdt['time'][0], rdt.fill_value('time'))


        stream_def_obj = self.pubsub_management.read_stream_definition(stream_def_id)
        rdt = RecordDictionaryTool(stream_definition=stream_def_obj)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)


        granule = rdt.to_granule()
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        np.testing.assert_array_equal(rdt['temp'], np.arange(20))

        
    def test_filter(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        filtered_stream_def_id = self.pubsub_management.create_stream_definition('filtered', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, filtered_stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id)
        self.assertEquals(rdt._available_fields,['time','temp'])
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        with self.assertRaises(KeyError):
            rdt['pressure'] = np.arange(20)

        granule = rdt.to_granule()
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        self.assertEquals(rdt._available_fields, rdt2._available_fields)
        self.assertEquals(rdt.fields, rdt2.fields)
        for k,v in rdt.iteritems():
            self.assertTrue(np.array_equal(rdt[k], rdt2[k]))
        


    def test_rdt_param_funcs(self):
        param_funcs = {
            'identity' : {
                'function_type' : PFT.PYTHON,
                'owner' : 'ion_functions.data.interpolation',
                'function' : 'identity',
                'args':['x']
            },
            'ctd_tempwat' : {
                'function_type' : PFT.PYTHON,
                'owner' : 'ion_functions.data.ctd_functions',
                'function' : 'ctd_sbe37im_tempwat',
                'args' : ['t0']
            },
            'ctd_preswat' : {
                'function_type' : PFT.PYTHON,
                'owner' : 'ion_functions.data.ctd_functions',
                'function' : 'ctd_sbe37im_preswat',
                'args' : ["p0", "p_range_psia"]
            },
            'ctd_condwat' : {
                'function_type' : PFT.PYTHON,
                'owner' : 'ion_functions.data.ctd_functions',
                'function' : 'ctd_sbe37im_condwat',
                'args' : ['c0']
            },
            'ctd_pracsal' : {
                'function_type' : PFT.PYTHON,
                'owner' : 'ion_functions.data.ctd_functions',
                'function' : 'ctd_pracsal',
                'args' : ['c', 't', 'p']
            },
            'ctd_density' : {
                'function_type' : PFT.PYTHON,
                'owner' : 'ion_functions.data.ctd_functions',
                'function' : 'ctd_density',
                'args' : ['SP','t','p','lat','lon']
            }
        }

        pfunc_ids = {}
        for name, param_def in param_funcs.iteritems():
            paramfunc = ParameterFunction(name, **param_def)
            pf_id = self.dataset_management.create_parameter_function(paramfunc)
            pfunc_ids[name] = pf_id


        params = {
            'time' : {
                'parameter_type' : 'quantity',
                'value_encoding' : 'float64',
                'units' : 'seconds since 1900-01-01'
            },
            'temperature_counts' : {
                'parameter_type' : 'quantity',
                'value_encoding' : 'float32',
                'units' : '1'
            },
            'pressure_counts' : {
                'parameter_type' : 'quantity',
                'value_encoding' : 'float32',
                'units' : '1'
            },
            'conductivity_counts' : {
                'parameter_type' : 'quantity',
                'value_encoding' : 'float32',
                'units' : '1'
            },
            'temperature' : {
                'parameter_type' : 'function',
                'parameter_function_id' : pfunc_ids['ctd_tempwat'],
                'parameter_function_map' : { 't0' : 'temperature_counts'},
                'value_encoding' : 'float32',
                'units' : 'deg_C'
            },
            'pressure' : {
                'parameter_type' : 'function',
                'parameter_function_id' : pfunc_ids['ctd_preswat'],
                'parameter_function_map' : {'p0' : 'pressure_counts', 'p_range_psia' : 679.34040721},
                'value_encoding' : 'float32',
                'units' : 'dbar'
            },
            'conductivity' : {
                'parameter_type' : 'function',
                'parameter_function_id' : pfunc_ids['ctd_condwat'],
                'parameter_function_map' : {'c0' : 'conductivity_counts'},
                'value_encoding' : 'float32',
                'units' : 'Sm-1'
            },
            'salinity' : {
                'parameter_type' : 'function',
                'parameter_function_id' : pfunc_ids['ctd_pracsal'],
                'parameter_function_map' : {'c' : 'conductivity', 't' : 'temperature', 'p' : 'pressure'},
                'value_encoding' : 'float32',
                'units' : '1'
            },
            'density' : {
                'parameter_type' : 'function',
                'parameter_function_id' : pfunc_ids['ctd_density'],
                'parameter_function_map' : {
                    'SP' : 'salinity',
                    't' : 'temperature',
                    'p' : 'pressure',
                    'lat' : 'lat',
                    'lon' : 'lon'
                },
                'value_encoding' : 'float32',
                'units' : 'kg m-1'
            },
            'lat' : {
                'parameter_type' : 'sparse',
                'value_encoding' : 'float32',
                'units' : 'degrees_north'
            },
            'lon' : {
                'parameter_type' : 'sparse',
                'value_encoding' : 'float32',
                'units' : 'degrees_east'
            }
        }
        param_dict = {}
        for name, param in params.iteritems():
            pcontext = ParameterContext(name, **param)
            param_id = self.dataset_management.create_parameter(pcontext)
            param_dict[name] = param_id
            
        pdict_id = self.dataset_management.create_parameter_dictionary('ctd_test', param_dict.values(), 'time')
        stream_def_id = self.pubsub_management.create_stream_definition('ctd_test', parameter_dictionary_id=pdict_id)


        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temperature_counts'] = [280000]
        rdt['conductivity_counts'] = [100000]
        rdt['pressure_counts'] = [2789]

        rdt['lat'] = [45]
        rdt['lon'] = [-71]

        np.testing.assert_allclose(rdt['density'], np.array([1001.00543606]))

    def test_rdt_lookup(self):
        rdt = self.create_lookup_rdt()

        self.assertTrue('offset_a' in rdt.lookup_values())
        self.assertFalse('offset_b' in rdt.lookup_values())

        rdt['time'] = [0]
        rdt['temp'] = [10.0]
        rdt['offset_a'] = [2.0]
        self.assertEquals(rdt['offset_b'], None)
        self.assertEquals(rdt.lookup_values(), ['offset_a'])
        np.testing.assert_array_almost_equal(rdt['calibrated'], np.array([12.0]))

        svm = StoredValueManager(self.container)
        svm.stored_value_cas('coefficient_document', {'offset_b':2.0})
        svm.stored_value_cas("GA03FLMA-RI001-13-CTDMOG999_OFFSETC", {'offset_c':3.0})
        rdt.fetch_lookup_values()
        np.testing.assert_array_equal(rdt['offset_b'], np.array([2.0]))
        np.testing.assert_array_equal(rdt['calibrated_b'], np.array([14.0]))
        np.testing.assert_array_equal(rdt['offset_c'], np.array([3.0]))


    def create_rdt(self):
        contexts, pfuncs = self.create_pfuncs()
        context_ids = list(contexts.itervalues())

        pdict_id = self.dataset_management.create_parameter_dictionary(name='functional_pdict', parameter_context_ids=context_ids, temporal_context='test_TIME')
        self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)
        stream_def_id = self.pubsub_management.create_stream_definition('functional', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        return rdt

    def create_lookup_rdt(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()

        stream_def_id = self.pubsub_management.create_stream_definition('lookup', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"})
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        return rdt


    def create_pfuncs(self):
        
        contexts = {}
        funcs = {}

        t_ctxt = ParameterContext(name='TIME', 
                                  parameter_type='quantity',
                                  value_encoding='float64',
                                  units='seconds since 1900-01-01')
        t_ctxt_id = self.dataset_management.create_parameter(t_ctxt)
        contexts['TIME'] = t_ctxt_id

        lat_ctxt = ParameterContext(name='LAT', 
                                    parameter_type="sparse",
                                    value_encoding='float32',
                                    units='degrees_north')
        lat_ctxt_id = self.dataset_management.create_parameter(lat_ctxt)
        contexts['LAT'] = lat_ctxt_id

        lon_ctxt = ParameterContext(name='LON', 
                                    parameter_type='sparse',
                                    value_encoding='float32',
                                    units='degrees_east')
        lon_ctxt_id = self.dataset_management.create_parameter(lon_ctxt)
        contexts['LON'] = lon_ctxt_id

        # Independent Parameters

        # Temperature - values expected to be the decimal results of conversion from hex
        temp_ctxt = ParameterContext(name='TEMPWAT_L0', 
                parameter_type='quantity',
                value_encoding='float32',
                units='deg_C')
        temp_ctxt_id = self.dataset_management.create_parameter(temp_ctxt)
        contexts['TEMPWAT_L0'] = temp_ctxt_id

        # Conductivity - values expected to be the decimal results of conversion from hex
        cond_ctxt = ParameterContext(name='CONDWAT_L0', 
                parameter_type='quantity',
                value_encoding='float32',
                units='S m-1')
        cond_ctxt_id = self.dataset_management.create_parameter(cond_ctxt)
        contexts['CONDWAT_L0'] = cond_ctxt_id

        # Pressure - values expected to be the decimal results of conversion from hex
        press_ctxt = ParameterContext(name='PRESWAT_L0', 
                parameter_type='quantity',
                value_encoding='float32',
                units='dbar')
        press_ctxt_id = self.dataset_management.create_parameter(press_ctxt)
        contexts['PRESWAT_L0'] = press_ctxt_id


        # Dependent Parameters

        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        tl1_func = '(T / 10000) - 10'
        expr = ParameterFunction(name='TEMPWAT_L1',
                function_type=PFT.NUMEXPR,
                function=tl1_func,
                args=['T'])
        expr_id = self.dataset_management.create_parameter_function(expr)
        funcs['TEMPWAT_L1'] = expr_id

        tl1_pmap = {'T': 'TEMPWAT_L0'}
        tempL1_ctxt = ParameterContext(name='TEMPWAT_L1', 
                parameter_type='function',
                parameter_function_id=expr_id,
                parameter_function_map=tl1_pmap,
                value_encoding='float32',
                units='deg_C')
        tempL1_ctxt_id = self.dataset_management.create_parameter(tempL1_ctxt)
        contexts['TEMPWAT_L1'] = tempL1_ctxt_id

        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        cl1_func = '(C / 100000) - 0.5'
        expr = ParameterFunction(name='CONDWAT_L1',
                function_type=PFT.NUMEXPR,
                function=cl1_func,
                args=['C'])
        expr_id = self.dataset_management.create_parameter_function(expr)
        funcs['CONDWAT_L1'] = expr_id

        cl1_pmap = {'C': 'CONDWAT_L0'}
        condL1_ctxt = ParameterContext(name='CONDWAT_L1', 
                parameter_type='function',
                parameter_function_id=expr_id,
                parameter_function_map=cl1_pmap,
                value_encoding='float32',
                units='S m-1')
        condL1_ctxt_id = self.dataset_management.create_parameter(condL1_ctxt)
        contexts['CONDWAT_L1'] = condL1_ctxt_id

        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)'
        expr = ParameterFunction(name='PRESWAT_L1',function=pl1_func,function_type=PFT.NUMEXPR,args=['P','p_range'])
        expr_id = self.dataset_management.create_parameter_function(expr)
        funcs['PRESWAT_L1'] = expr_id
        
        pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721}
        presL1_ctxt = ParameterContext(name='PRESWAT_L1',
                parameter_type='function',
                parameter_function_id=expr_id,
                parameter_function_map=pl1_pmap,
                value_encoding='float32',
                units='S m-1')
        presL1_ctxt_id = self.dataset_management.create_parameter(presL1_ctxt)
        contexts['PRESWAT_L1'] = presL1_ctxt_id

        # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project:
        #       https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1

        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        owner = 'gsw'
        sal_func = 'SP_from_C'
        sal_arglist = ['C', 't', 'p']
        expr = ParameterFunction(name='PRACSAL',function_type=PFT.PYTHON,function=sal_func,owner=owner,args=sal_arglist)
        expr_id = self.dataset_management.create_parameter_function(expr)
        funcs['PRACSAL'] = expr_id
        
        c10_f = ParameterFunction(name='c10', function_type=PFT.NUMEXPR, function='C*10', args=['C'])
        expr_id = self.dataset_management.create_parameter_function(c10_f)
        c10 = ParameterContext(name='c10', 
                parameter_type='function',
                parameter_function_id=expr_id,
                parameter_function_map={'C':'CONDWAT_L1'},
                value_encoding='float32',
                units='1')
        c10_id = self.dataset_management.create_parameter(c10)
        contexts['c10'] = c10_id

        # A magic function that may or may not exist actually forms the line below at runtime.
        sal_pmap = {'C': 'c10', 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'}
        sal_ctxt = ParameterContext(name='PRACSAL', 
                parameter_type='function',
                parameter_function_id=expr_id,
                parameter_function_map=sal_pmap,
                value_encoding='float32',
                units='g kg-1')

        sal_ctxt_id = self.dataset_management.create_parameter(sal_ctxt)
        contexts['PRACSAL'] = sal_ctxt_id

        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        owner = 'gsw'
        abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON','LAT'])
        cons_temp_expr = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1'])
        dens_expr = PythonFunction('DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1'])
        dens_ctxt = CoverageParameterContext('DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL)
        dens_ctxt.uom = 'kg m-3'
        dens_ctxt_id = self.dataset_management.create_parameter_context(name='DENSITY', parameter_context=dens_ctxt.dump())
        self.addCleanup(self.dataset_management.delete_parameter_context, dens_ctxt_id)
        contexts['DENSITY'] = dens_ctxt_id
        return contexts, funcs
コード例 #7
0
class CtdbpTransformsIntTest(IonIntegrationTestCase):
    def setUp(self):
        super(CtdbpTransformsIntTest, self).setUp()

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.pubsub            = PubsubManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.dataproduct_management = DataProductManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()

        # This is for the time values inside the packets going into the transform
        self.i = 0

        # Cleanup of queue created by the subscriber

    def _get_new_ctd_packet(self, stream_definition_id, length):

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)
        rdt['time'] = numpy.arange(self.i, self.i+length)

        for field in rdt:
            if isinstance(rdt._pdict.get_context(field).param_type, QuantityType):
                rdt[field] = numpy.array([random.uniform(0.0,75.0)  for i in xrange(length)])

        g = rdt.to_granule()
        self.i+=length

        return g

    def _create_input_param_dict_for_test(self, parameter_dict_name = ''):

        pdict = ParameterDictionary()

        t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=numpy.dtype('float64')))
        t_ctxt.axis = AxisTypeEnum.TIME
        t_ctxt.uom = 'seconds since 01-01-1900'
        pdict.add_context(t_ctxt)

        cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        cond_ctxt.uom = ''
        pdict.add_context(cond_ctxt)

        pres_ctxt = ParameterContext('pressure', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        pres_ctxt.uom = ''
        pdict.add_context(pres_ctxt)

        temp_ctxt = ParameterContext('temperature', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        temp_ctxt.uom = ''
        pdict.add_context(temp_ctxt)

        dens_ctxt = ParameterContext('density', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        dens_ctxt.uom = ''
        pdict.add_context(dens_ctxt)

        sal_ctxt = ParameterContext('salinity', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        sal_ctxt.uom = ''
        pdict.add_context(sal_ctxt)

        #create temp streamdef so the data product can create the stream
        pc_list = []
        for pc_k, pc in pdict.iteritems():
            ctxt_id = self.dataset_management.create_parameter_context(pc_k, pc[1].dump())
            pc_list.append(ctxt_id)
            self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id)

        pdict_id = self.dataset_management.create_parameter_dictionary(parameter_dict_name, pc_list)
        self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)

        return pdict_id

    def test_ctdbp_L0_all(self):
        """
        Test packets processed by the ctdbp_L0_all transform
        """

        #----------- Data Process Definition --------------------------------

        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='CTDBP_L0_all',
            description='Take parsed stream and put the C, T and P into three separate L0 streams.',
            module='ion.processes.data.transforms.ctdbp.ctdbp_L0',
            class_name='CTDBP_L0_all')

        dprocdef_id = self.data_process_management.create_data_process_definition(dpd_obj)
        self.addCleanup(self.data_process_management.delete_data_process_definition, dprocdef_id)

        log.debug("created data process definition: id = %s", dprocdef_id)

        #----------- Data Products --------------------------------

        # Construct temporal and spatial Coordinate Reference System objects
        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()

        input_param_dict = self._create_input_param_dict_for_test(parameter_dict_name = 'fictitious_ctdp_param_dict')

        # Get the stream definition for the stream using the parameter dictionary
#        input_param_dict = self.dataset_management.read_parameter_dictionary_by_name('ctdbp_cdef_sample', id_only=True)
        input_stream_def_dict = self.pubsub.create_stream_definition(name='parsed', parameter_dictionary_id=input_param_dict)
        self.addCleanup(self.pubsub.delete_stream_definition, input_stream_def_dict)

        log.debug("Got the parsed parameter dictionary: id: %s", input_param_dict)
        log.debug("Got the stream def for parsed input: %s", input_stream_def_dict)

        # Input data product
        parsed_stream_dp_obj = IonObject(RT.DataProduct,
            name='parsed_stream',
            description='Parsed stream input to CTBP L0 transform',
            temporal_domain = tdom,
            spatial_domain = sdom)

        input_dp_id = self.dataproduct_management.create_data_product(data_product=parsed_stream_dp_obj,
            stream_definition_id=input_stream_def_dict
        )
        self.addCleanup(self.dataproduct_management.delete_data_product, input_dp_id)

        # output data product
        L0_stream_dp_obj = IonObject(RT.DataProduct,
            name='L0_stream',
            description='L0_stream output of CTBP L0 transform',
            temporal_domain = tdom,
            spatial_domain = sdom)

        L0_stream_dp_id = self.dataproduct_management.create_data_product(data_product=L0_stream_dp_obj,
                                                                    stream_definition_id=input_stream_def_dict
                                                                    )
        self.addCleanup(self.dataproduct_management.delete_data_product, L0_stream_dp_id)

        # We need the key name here to be "L0_stream", since when the data process is launched, this name goes into
        # the config as in config.process.publish_streams.L0_stream when the config is used to launch the data process
        self.output_products = {'L0_stream' : L0_stream_dp_id}
        out_stream_ids, _ = self.resource_registry.find_objects(L0_stream_dp_id, PRED.hasStream, RT.Stream, True)
        self.assertTrue(len(out_stream_ids))
        output_stream_id = out_stream_ids[0]

        dproc_id = self.data_process_management.create_data_process( dprocdef_id, [input_dp_id], self.output_products)
        self.addCleanup(self.data_process_management.delete_data_process, dproc_id)

        log.debug("Created a data process for ctdbp_L0. id: %s", dproc_id)

        # Activate the data process
        self.data_process_management.activate_data_process(dproc_id)
        self.addCleanup(self.data_process_management.deactivate_data_process, dproc_id)

        #----------- Find the stream that is associated with the input data product when it was created by create_data_product() --------------------------------

        stream_ids, _ = self.resource_registry.find_objects(input_dp_id, PRED.hasStream, RT.Stream, True)
        self.assertTrue(len(stream_ids))

        input_stream_id = stream_ids[0]
        stream_route = self.pubsub.read_stream_route(input_stream_id)

        log.debug("The input stream for the L0 transform: %s", input_stream_id)

        #----------- Create a subscriber that will listen to the transform's output --------------------------------

        ar = gevent.event.AsyncResult()
        def subscriber(m,r,s):
            ar.set(m)

        sub = StandaloneStreamSubscriber(exchange_name='sub', callback=subscriber)

        sub_id = self.pubsub.create_subscription('subscriber_to_transform',
            stream_ids=[output_stream_id],
            exchange_name='sub')
        self.addCleanup(self.pubsub.delete_subscription, sub_id)

        self.pubsub.activate_subscription(sub_id)
        self.addCleanup(self.pubsub.deactivate_subscription, sub_id)


        sub.start()
        self.addCleanup(sub.stop)

        #----------- Publish on that stream so that the transform can receive it --------------------------------

        pub = StandaloneStreamPublisher(input_stream_id, stream_route)
        publish_granule = self._get_new_ctd_packet(stream_definition_id=input_stream_def_dict, length = 5)

        pub.publish(publish_granule)

        log.debug("Published the following granule: %s", publish_granule)

        granule_from_transform = ar.get(timeout=20)

        log.debug("Got the following granule from the transform: %s", granule_from_transform)

        # Check that the granule published by the L0 transform has the right properties
        self._check_granule_from_transform(granule_from_transform)


    def _check_granule_from_transform(self, granule):
        """
        An internal method to check if a granule has the right properties
        """

        pass
コード例 #8
0
class TestStreamIngestionWorker(IonIntegrationTestCase):

    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.dataset_management_client = DatasetManagementServiceClient(node=self.container.node)
        self.pubsub_client = PubsubManagementServiceClient(node=self.container.node)

        self.time_dom, self.spatial_dom = time_series_domain()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True)

        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id)

        self.stream_id, self.route_id = self.pubsub_client.create_stream(name='parsed_stream', stream_definition_id=self.stream_def_id, exchange_point='science_data')
        self.addCleanup(self.pubsub_client.delete_stream, self.stream_id)

        self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name='parsed_subscription')
        self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id)

        self.publisher = StandaloneStreamPublisher(self.stream_id, self.route_id)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_stream_ingestion_worker(self):
        self.start_ingestion_worker()

        context_ids, time_ctxt = self._create_param_contexts()
        pdict_id = self.dataset_management_client.create_parameter_dictionary(name='stream_ingestion_pdict', parameter_context_ids=context_ids, temporal_context='ingestion_timestamp')
        self.addCleanup(self.dataset_management_client.delete_parameter_dictionary, pdict_id)

        dataset_id = self.dataset_management_client.create_dataset(name='fake_dataset', description='fake_dataset', stream_id=self.stream_id, parameter_dictionary_id=pdict_id)
        self.addCleanup(self.dataset_management_client.delete_dataset, dataset_id)

        self.cov = self._create_coverage(dataset_id=dataset_id, parameter_dict_id=pdict_id, time_dom=self.time_dom, spatial_dom=self.spatial_dom)
        self.addCleanup(self.cov.close)

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['conductivity'] = 1
        rdt['pressure'] = 2
        rdt['salinity'] = 3

        self.start_listener(dataset_id)

        self.publisher.publish(rdt.to_granule())
        self.data_modified = Event()
        self.data_modified.wait(30)

        cov = self.get_coverage(dataset_id)
        self.assertIsNotNone(cov.get_parameter_values('raw'))

        deserializer = IonObjectDeserializer(obj_registry=get_obj_registry())

        granule = retrieve_stream(dataset_id)
        rdt_complex = RecordDictionaryTool.load_from_granule(granule)
        rdt_complex['raw'] = [deserializer.deserialize(i) for i in rdt_complex['raw']]
        for gran in rdt_complex['raw']:
            rdt_new = RecordDictionaryTool.load_from_granule(gran)
            self.assertIn(1, rdt_new['conductivity'])
            self.assertIn(2, rdt_new['pressure'])
            self.assertIn(3, rdt_new['salinity'])
            
        cov.close()


    def start_ingestion_worker(self):
        config = DotDict()
        config.process.queue_name = 'parsed_subscription'

        self.container.spawn_process(
            name='stream_ingestion_worker',
            module='ion.processes.data.ingestion.stream_ingestion_worker',
            cls='StreamIngestionWorker',
            config=config
        )

    def start_listener(self, dataset_id):
        def cb(*args, **kwargs):
            self.data_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id)
        es.start()

        self.addCleanup(es.stop)

    def _create_param_contexts(self):
        context_ids = []
        t_ctxt = ParameterContext('ingestion_timestamp', param_type=QuantityType(value_encoding=numpy.dtype('float64')))
        t_ctxt.uom = 'seconds since 1900-01-01'
        t_ctxt.fill_value = -9999
        t_ctxt_id = self.dataset_management_client.create_parameter_context(name='ingestion_timestamp', parameter_context=t_ctxt.dump())
        context_ids.append(t_ctxt_id)

        raw_ctxt = ParameterContext('raw', param_type=ArrayType())
        raw_ctxt.uom = ''
        context_ids.append(self.dataset_management_client.create_parameter_context(name='raw', parameter_context=raw_ctxt.dump()))

        return context_ids, t_ctxt_id

    def _create_coverage(self, dataset_id, parameter_dict_id, time_dom, spatial_dom):
        pd = self.dataset_management_client.read_parameter_dictionary(parameter_dict_id)
        pdict = ParameterDictionary.load(pd)
        sdom = GridDomain.load(spatial_dom.dump())
        tdom = GridDomain.load(time_dom.dump())
        file_root = FileSystem.get_url(FS.CACHE,'datasets')
        scov = SimplexCoverage(file_root, dataset_id, dataset_id, parameter_dictionary=pdict, temporal_domain=tdom, spatial_domain=sdom)
        return scov

    def get_coverage(self, dataset_id,mode='w'):
        file_root = FileSystem.get_url(FS.CACHE,'datasets')
        coverage = AbstractCoverage.load(file_root, dataset_id, mode=mode)
        return coverage
コード例 #9
0
class CtdTransformsIntTest(IonIntegrationTestCase):
    def setUp(self):
        super(CtdTransformsIntTest, self).setUp()

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.pubsub            = PubsubManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.dataproduct_management = DataProductManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()

        # This is for the time values inside the packets going into the transform
        self.i = 0

        # Cleanup of queue created by the subscriber
        self.queue_cleanup = []
        self.data_process_cleanup = []

    def _create_input_param_dict_for_test(self, parameter_dict_name = ''):

        pdict = ParameterDictionary()

        t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=numpy.dtype('float64')))
        t_ctxt.axis = AxisTypeEnum.TIME
        t_ctxt.uom = 'seconds since 01-01-1900'
        pdict.add_context(t_ctxt)

        cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        cond_ctxt.uom = ''
        pdict.add_context(cond_ctxt)

        pres_ctxt = ParameterContext('pressure', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        pres_ctxt.uom = ''
        pdict.add_context(pres_ctxt)

        if parameter_dict_name == 'input_param_dict':
            temp_ctxt = ParameterContext('temperature', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        else:
            temp_ctxt = ParameterContext('temp', param_type=QuantityType(value_encoding=numpy.dtype('float32')))

        temp_ctxt.uom = ''
        pdict.add_context(temp_ctxt)

        dens_ctxt = ParameterContext('density', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        dens_ctxt.uom = ''
        pdict.add_context(dens_ctxt)

        sal_ctxt = ParameterContext('salinity', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        sal_ctxt.uom = ''
        pdict.add_context(sal_ctxt)

        #create temp streamdef so the data product can create the stream
        pc_list = []
        for pc_k, pc in pdict.iteritems():
            ctxt_id = self.dataset_management.create_parameter_context(pc_k, pc[1].dump())
            pc_list.append(ctxt_id)
            if parameter_dict_name == 'input_param_dict':
                self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id)
            elif  parameter_dict_name == 'output_param_dict' and pc[1].name == 'temp':
                self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id)

        pdict_id = self.dataset_management.create_parameter_dictionary(parameter_dict_name, pc_list)
        self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)

        return pdict_id

    def _get_new_ctd_L0_packet(self, stream_definition_id, length):

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)
        rdt['time'] = numpy.arange(self.i, self.i+length)

        for field in rdt:
            if isinstance(rdt._pdict.get_context(field).param_type, QuantityType):
                rdt[field] = numpy.array([random.uniform(0.0,75.0)  for i in xrange(length)])

        g = rdt.to_granule()
        self.i+=length

        return g

    def _create_calibration_coefficients_dict(self):
        config = DotDict()
        config.process.calibration_coeffs =  {
              'temp_calibration_coeffs': {
                  'TA0' : 1.561342e-03,
                  'TA1' : 2.561486e-04,
                  'TA2' : 1.896537e-07,
                  'TA3' : 1.301189e-07,
                  'TOFFSET' : 0.000000e+00
              },

              'cond_calibration_coeffs':  {
                  'G' : -9.896568e-01,
                  'H' : 1.316599e-01,
                  'I' : -2.213854e-04,
                  'J' : 3.292199e-05,
                  'CPCOR' : -9.570000e-08,
                  'CTCOR' : 3.250000e-06,
                  'CSLOPE' : 1.000000e+00
              },

              'pres_calibration_coeffs' : {
                  'PA0' : 4.960417e-02,
                  'PA1' : 4.883682e-04,
                  'PA2' : -5.687309e-12,
                  'PTCA0' : 5.249802e+05,
                  'PTCA1' : 7.595719e+00,
                  'PTCA2' : -1.322776e-01,
                  'PTCB0' : 2.503125e+01,
                  'PTCB1' : 5.000000e-05,
                  'PTCB2' : 0.000000e+00,
                  'PTEMPA0' : -6.431504e+01,
                  'PTEMPA1' : 5.168177e+01,
                  'PTEMPA2' : -2.847757e-01,
                  'POFFSET' : 0.000000e+00
              }

          }

        return config

    def clean_queues(self):
        for queue in self.queue_cleanup:
            xn = self.container.ex_manager.create_xn_queue(queue)
            xn.delete()

    def cleaning_operations(self):
        for dproc_id in self.data_process_cleanup:
            self.data_process_management.delete_data_process(dproc_id)

    def test_ctd_L1_all(self):
        """
        Test that packets are processed by the ctd_L1_all transform
        """

        #----------- Data Process Definition --------------------------------

        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='CTDBP_L1_Transform',
            description='Take granules on the L0 stream which have the C, T and P data and separately apply algorithms and output on the L1 stream.',
            module='ion.processes.data.transforms.ctdbp.ctdbp_L1',
            class_name='CTDBP_L1_Transform')

        dprocdef_id = self.data_process_management.create_data_process_definition(dpd_obj)
        self.addCleanup(self.data_process_management.delete_data_process_definition, dprocdef_id)

        log.debug("created data process definition: id = %s", dprocdef_id)

        #----------- Data Products --------------------------------

        # Construct temporal and spatial Coordinate Reference System objects
        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()

        # Get the stream definition for the stream using the parameter dictionary
        L0_pdict_id = self._create_input_param_dict_for_test(parameter_dict_name = 'input_param_dict')

        L0_stream_def_id = self.pubsub.create_stream_definition(name='parsed', parameter_dictionary_id=L0_pdict_id)
        self.addCleanup(self.pubsub.delete_stream_definition, L0_stream_def_id)

        L1_pdict_id = self._create_input_param_dict_for_test(parameter_dict_name = 'output_param_dict')
        L1_stream_def_id = self.pubsub.create_stream_definition(name='L1_out', parameter_dictionary_id=L1_pdict_id)
        self.addCleanup(self.pubsub.delete_stream_definition, L1_stream_def_id)


        log.debug("Got the parsed parameter dictionary: id: %s", L0_pdict_id)
        log.debug("Got the stream def for parsed input: %s", L0_stream_def_id)

        log.debug("got the stream def for the output: %s", L1_stream_def_id)

        # Input data product
        L0_stream_dp_obj = IonObject(RT.DataProduct,
            name='L0_stream',
            description='L0 stream input to CTBP L1 transform',
            temporal_domain = tdom,
            spatial_domain = sdom)

        input_dp_id = self.dataproduct_management.create_data_product(data_product=L0_stream_dp_obj,
            stream_definition_id=L0_stream_def_id
        )
        self.addCleanup(self.dataproduct_management.delete_data_product, input_dp_id)

        # output data product
        L1_stream_dp_obj = IonObject(RT.DataProduct,
            name='L1_stream',
            description='L1_stream output of CTBP L1 transform',
            temporal_domain = tdom,
            spatial_domain = sdom)

        L1_stream_dp_id = self.dataproduct_management.create_data_product(data_product=L1_stream_dp_obj,
            stream_definition_id=L1_stream_def_id
        )
        self.addCleanup(self.dataproduct_management.delete_data_product, L1_stream_dp_id)

        # We need the key name here to be "L1_stream", since when the data process is launched, this name goes into
        # the config as in config.process.publish_streams.L1_stream when the config is used to launch the data process
        self.output_products = {'L1_stream' : L1_stream_dp_id}
        out_stream_ids, _ = self.resource_registry.find_objects(L1_stream_dp_id, PRED.hasStream, RT.Stream, True)
        self.assertTrue(len(out_stream_ids))
        output_stream_id = out_stream_ids[0]

        config = self._create_calibration_coefficients_dict()
        dproc_id = self.data_process_management.create_data_process( dprocdef_id, [input_dp_id], self.output_products, config)
        self.addCleanup(self.data_process_management.delete_data_process, dproc_id)
        log.debug("Created a data process for ctdbp_L1. id: %s", dproc_id)

        # Activate the data process
        self.data_process_management.activate_data_process(dproc_id)
        self.addCleanup(self.data_process_management.deactivate_data_process, dproc_id)

        #----------- Find the stream that is associated with the input data product when it was created by create_data_product() --------------------------------

        stream_ids, _ = self.resource_registry.find_objects(input_dp_id, PRED.hasStream, RT.Stream, True)

        input_stream_id = stream_ids[0]
        input_stream = self.resource_registry.read(input_stream_id)
        stream_route = input_stream.stream_route

        log.debug("The input stream for the L1 transform: %s", input_stream_id)

        #----------- Create a subscriber that will listen to the transform's output --------------------------------

        ar = gevent.event.AsyncResult()
        def subscriber(m,r,s):
            ar.set(m)

        sub = StandaloneStreamSubscriber(exchange_name='sub', callback=subscriber)

        sub_id = self.pubsub.create_subscription('subscriber_to_transform',
            stream_ids=[output_stream_id],
            exchange_name='sub')
        self.addCleanup(self.pubsub.delete_subscription, sub_id)

        self.pubsub.activate_subscription(sub_id)
        self.addCleanup(self.pubsub.deactivate_subscription, sub_id)


        sub.start()
        self.addCleanup(sub.stop)

        #----------- Publish on that stream so that the transform can receive it --------------------------------

        pub = StandaloneStreamPublisher(input_stream_id, stream_route)
        publish_granule = self._get_new_ctd_L0_packet(stream_definition_id=L0_stream_def_id, length = 5)

        pub.publish(publish_granule)

        log.debug("Published the following granule: %s", publish_granule)

        granule_from_transform = ar.get(timeout=20)

        log.debug("Got the following granule from the transform: %s", granule_from_transform)

        # Check that the granule published by the L1 transform has the right properties
        self._check_granule_from_transform(granule_from_transform)


    def _check_granule_from_transform(self, granule):
        """
        An internal method to check if a granule has the right properties
        """

        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assertIn('pressure', rdt)
        self.assertIn('temp', rdt)
        self.assertIn('conductivity', rdt)
        self.assertIn('time', rdt)
コード例 #10
0
    def _setup_resources(self):
        # TODO: some or all of this (or some variation) should move to DAMS'

        # Build the test resources for the dataset
        dms_cli = DatasetManagementServiceClient()
        dams_cli = DataAcquisitionManagementServiceClient()
        dpms_cli = DataProductManagementServiceClient()
        rr_cli = ResourceRegistryServiceClient()
        pubsub_cli = PubsubManagementServiceClient()

        eda = ExternalDatasetAgent(name='example data agent', handler_module=self.DVR_CONFIG['dvr_mod'], handler_class=self.DVR_CONFIG['dvr_cls'])
        eda_id = dams_cli.create_external_dataset_agent(eda)

        eda_inst = ExternalDatasetAgentInstance(name='example dataset agent instance')
        eda_inst_id = dams_cli.create_external_dataset_agent_instance(eda_inst, external_dataset_agent_id=eda_id)

        # Create and register the necessary resources/objects

        # Create DataProvider
        dprov = ExternalDataProvider(name='example data provider', institution=Institution(), contact=ContactInformation())
        dprov.contact.individual_names_given = 'Christopher Mueller'
        dprov.contact.email = '*****@*****.**'

        # Create DataSource
        dsrc = DataSource(name='example datasource', protocol_type='FILE', institution=Institution(), contact=ContactInformation())
        dsrc.connection_params['base_data_url'] = ''
        dsrc.contact.individual_names_given = 'Tim Giguere'
        dsrc.contact.email = '*****@*****.**'

        # Create ExternalDataset
        ds_name = 'ruv_test_dataset'
        dset = ExternalDataset(name=ds_name, dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation())

        dset.dataset_description.parameters['base_url'] = 'test_data/ruv/'
        dset.dataset_description.parameters['list_pattern'] = 'RDLi_SEAB_2011_08_24_1600.ruv'
        dset.dataset_description.parameters['date_pattern'] = '%Y %m %d %H %M'
        dset.dataset_description.parameters['date_extraction_pattern'] = 'RDLi_SEAB_([\d]{4})_([\d]{2})_([\d]{2})_([\d]{2})([\d]{2}).ruv'
        dset.dataset_description.parameters['temporal_dimension'] = None
        dset.dataset_description.parameters['zonal_dimension'] = None
        dset.dataset_description.parameters['meridional_dimension'] = None
        dset.dataset_description.parameters['vertical_dimension'] = None
        dset.dataset_description.parameters['variables'] = [
        ]

        # Create DataSourceModel
        dsrc_model = DataSourceModel(name='ruv_model')
        #dsrc_model.model = 'RUV'
        dsrc_model.data_handler_module = 'N/A'
        dsrc_model.data_handler_class = 'N/A'

        ## Run everything through DAMS
        ds_id = dams_cli.create_external_dataset(external_dataset=dset)
        ext_dprov_id = dams_cli.create_external_data_provider(external_data_provider=dprov)
        ext_dsrc_id = dams_cli.create_data_source(data_source=dsrc)
        ext_dsrc_model_id = dams_cli.create_data_source_model(dsrc_model)

        # Register the ExternalDataset
        dproducer_id = dams_cli.register_external_data_set(external_dataset_id=ds_id)

        # Or using each method
        dams_cli.assign_data_source_to_external_data_provider(data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id)
        dams_cli.assign_data_source_to_data_model(data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id)
        dams_cli.assign_external_dataset_to_data_source(external_dataset_id=ds_id, data_source_id=ext_dsrc_id)
        dams_cli.assign_external_dataset_to_agent_instance(external_dataset_id=ds_id, agent_instance_id=eda_inst_id)

        pdict = ParameterDictionary()

        t_ctxt = ParameterContext('data', param_type=QuantityType(value_encoding=numpy.dtype('int64')))
        t_ctxt.axis = AxisTypeEnum.TIME
        t_ctxt.uom = 'seconds since 01-01-1970'
        pdict.add_context(t_ctxt)

        #create temp streamdef so the data product can create the stream
        pc_list = []
        for pc_k, pc in pdict.iteritems():
            pc_list.append(dms_cli.create_parameter_context(pc_k, pc[1].dump()))

        pdict_id = dms_cli.create_parameter_dictionary('ruv_param_dict', pc_list)

        streamdef_id = pubsub_cli.create_stream_definition(name="ruv", description="stream def for ruv testing", parameter_dictionary_id=pdict_id)

        tdom, sdom = time_series_domain()
        tdom, sdom = tdom.dump(), sdom.dump()

        dprod = IonObject(RT.DataProduct,
            name='ruv_parsed_product',
            description='parsed ruv product',
            temporal_domain=tdom,
            spatial_domain=sdom)

        # Generate the data product and associate it to the ExternalDataset
        dproduct_id = dpms_cli.create_data_product(data_product=dprod,
            stream_definition_id=streamdef_id)

        dams_cli.assign_data_product(input_resource_id=ds_id, data_product_id=dproduct_id)

        stream_id, assn = rr_cli.find_objects(subject=dproduct_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True)
        stream_id = stream_id[0]

        log.info('Created resources: {0}'.format({'ExternalDataset': ds_id, 'ExternalDataProvider': ext_dprov_id, 'DataSource': ext_dsrc_id, 'DataSourceModel': ext_dsrc_model_id, 'DataProducer': dproducer_id, 'DataProduct': dproduct_id, 'Stream': stream_id}))

        #CBM: Eventually, probably want to group this crap somehow - not sure how yet...

        # Create the logger for receiving publications
        _, stream_route, _ = self.create_stream_and_logger(name='ruv', stream_id=stream_id)

        self.EDA_RESOURCE_ID = ds_id
        self.EDA_NAME = ds_name
        self.DVR_CONFIG['dh_cfg'] = {
            'TESTING': True,
            'stream_id': stream_id,
            'stream_route': stream_route,
            'external_dataset_res': dset,
            'param_dictionary': pdict.dump(),
            'data_producer_id': dproducer_id,  # CBM: Should this be put in the main body of the config - with mod & cls?
            'max_records': 20,
            }
コード例 #11
0
class TestStreamIngestionWorker(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.dataset_management_client = DatasetManagementServiceClient(
            node=self.container.node)
        self.pubsub_client = PubsubManagementServiceClient(
            node=self.container.node)

        self.time_dom, self.spatial_dom = time_series_domain()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        self.stream_id, self.route_id = self.pubsub_client.create_stream(
            name='parsed_stream',
            stream_definition_id=self.stream_def_id,
            exchange_point='science_data')
        self.addCleanup(self.pubsub_client.delete_stream, self.stream_id)

        self.subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_id],
            exchange_name='parsed_subscription')
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_id)

        self.publisher = StandaloneStreamPublisher(self.stream_id,
                                                   self.route_id)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_stream_ingestion_worker(self):
        self.start_ingestion_worker()

        context_ids, time_ctxt = self._create_param_contexts()
        pdict_id = self.dataset_management_client.create_parameter_dictionary(
            name='stream_ingestion_pdict',
            parameter_context_ids=context_ids,
            temporal_context='ingestion_timestamp')
        self.addCleanup(
            self.dataset_management_client.delete_parameter_dictionary,
            pdict_id)

        dataset_id = self.dataset_management_client.create_dataset(
            name='fake_dataset',
            description='fake_dataset',
            stream_id=self.stream_id,
            spatial_domain=self.spatial_dom.dump(),
            temporal_domain=self.time_dom.dump(),
            parameter_dictionary_id=pdict_id)
        self.addCleanup(self.dataset_management_client.delete_dataset,
                        dataset_id)

        self.cov = self._create_coverage(dataset_id=dataset_id,
                                         parameter_dict_id=pdict_id,
                                         time_dom=self.time_dom,
                                         spatial_dom=self.spatial_dom)
        self.addCleanup(self.cov.close)

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['conductivity'] = 1
        rdt['pressure'] = 2
        rdt['salinity'] = 3

        self.start_listener(dataset_id)

        self.publisher.publish(rdt.to_granule())
        self.data_modified = Event()
        self.data_modified.wait(30)

        cov = self.get_coverage(dataset_id)
        self.assertIsNotNone(cov.get_parameter_values('raw'))

        deserializer = IonObjectDeserializer(obj_registry=get_obj_registry())

        granule = retrieve_stream(dataset_id)
        rdt_complex = RecordDictionaryTool.load_from_granule(granule)
        rdt_complex['raw'] = [
            deserializer.deserialize(i) for i in rdt_complex['raw']
        ]
        for gran in rdt_complex['raw']:
            rdt_new = RecordDictionaryTool.load_from_granule(gran)
            self.assertIn(1, rdt_new['conductivity'])
            self.assertIn(2, rdt_new['pressure'])
            self.assertIn(3, rdt_new['salinity'])

        cov.close()

    def start_ingestion_worker(self):
        config = DotDict()
        config.process.queue_name = 'parsed_subscription'

        self.container.spawn_process(
            name='stream_ingestion_worker',
            module='ion.processes.data.ingestion.stream_ingestion_worker',
            cls='StreamIngestionWorker',
            config=config)

    def start_listener(self, dataset_id):
        def cb(*args, **kwargs):
            self.data_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb,
                             origin=dataset_id)
        es.start()

        self.addCleanup(es.stop)

    def _create_param_contexts(self):
        context_ids = []
        t_ctxt = ParameterContext(
            'ingestion_timestamp',
            param_type=QuantityType(value_encoding=numpy.dtype('float64')))
        t_ctxt.uom = 'seconds since 1900-01-01'
        t_ctxt.fill_value = -9999
        t_ctxt_id = self.dataset_management_client.create_parameter_context(
            name='ingestion_timestamp', parameter_context=t_ctxt.dump())
        context_ids.append(t_ctxt_id)

        raw_ctxt = ParameterContext('raw', param_type=ArrayType())
        raw_ctxt.uom = ''
        context_ids.append(
            self.dataset_management_client.create_parameter_context(
                name='raw', parameter_context=raw_ctxt.dump()))

        return context_ids, t_ctxt_id

    def _create_coverage(self, dataset_id, parameter_dict_id, time_dom,
                         spatial_dom):
        pd = self.dataset_management_client.read_parameter_dictionary(
            parameter_dict_id)
        pdict = ParameterDictionary.load(pd)
        sdom = GridDomain.load(spatial_dom.dump())
        tdom = GridDomain.load(time_dom.dump())
        file_root = FileSystem.get_url(FS.CACHE, 'datasets')
        scov = SimplexCoverage(file_root,
                               dataset_id,
                               dataset_id,
                               parameter_dictionary=pdict,
                               temporal_domain=tdom,
                               spatial_domain=sdom)
        return scov

    def get_coverage(self, dataset_id, mode='w'):
        file_root = FileSystem.get_url(FS.CACHE, 'datasets')
        coverage = AbstractCoverage.load(file_root, dataset_id, mode=mode)
        return coverage
コード例 #12
0
    def _setup_resources(self):
        # TODO: some or all of this (or some variation) should move to DAMS'

        # Build the test resources for the dataset
        dms_cli = DatasetManagementServiceClient()
        dams_cli = DataAcquisitionManagementServiceClient()
        dpms_cli = DataProductManagementServiceClient()
        rr_cli = ResourceRegistryServiceClient()
        pubsub_cli = PubsubManagementServiceClient()

        eda = ExternalDatasetAgent(handler_module=self.DVR_CONFIG['dvr_mod'], handler_class=self.DVR_CONFIG['dvr_cls'])
        eda_id = dams_cli.create_external_dataset_agent(eda)

        eda_inst = ExternalDatasetAgentInstance()
        eda_inst_id = dams_cli.create_external_dataset_agent_instance(eda_inst, external_dataset_agent_id=eda_id)

        # Create and register the necessary resources/objects

        # Create DataProvider
        dprov = ExternalDataProvider(institution=Institution(), contact=ContactInformation())
        dprov.contact.individual_names_given = 'Christopher Mueller'
        dprov.contact.email = '*****@*****.**'

        # Create DataSource
        dsrc = DataSource(protocol_type='DAP', institution=Institution(), contact=ContactInformation())
        dsrc.connection_params['base_data_url'] = ''
        dsrc.contact.individual_names_given = 'Tim Giguere'
        dsrc.contact.email = '*****@*****.**'

        # Create ExternalDataset
        ds_name = 'dummy_dataset'
        dset = ExternalDataset(name=ds_name, dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation())

        # The usgs.nc test dataset is a download of the R1 dataset found here:
        # http://thredds-test.oceanobservatories.org/thredds/dodsC/ooiciData/E66B1A74-A684-454A-9ADE-8388C2C634E5.ncml
        dset.dataset_description.parameters['base_url'] = 'test_data/dummy'
        dset.dataset_description.parameters['list_pattern'] = 'test*.dum'
        dset.dataset_description.parameters['date_pattern'] = '%Y %m %d %H'
        dset.dataset_description.parameters['date_extraction_pattern'] = 'test([\d]{4})-([\d]{2})-([\d]{2})-([\d]{2}).dum'
        dset.dataset_description.parameters['temporal_dimension'] = 'time'
        dset.dataset_description.parameters['zonal_dimension'] = 'lon'
        dset.dataset_description.parameters['meridional_dimension'] = 'lat'
        dset.dataset_description.parameters['variables'] = [
            'dummy',
            ]

        # Create DataSourceModel
        dsrc_model = DataSourceModel(name='dap_model')
        #dsrc_model.model = 'DAP'
        dsrc_model.data_handler_module = 'N/A'
        dsrc_model.data_handler_class = 'N/A'

        ## Run everything through DAMS
        ds_id = dams_cli.create_external_dataset(external_dataset=dset)
        ext_dprov_id = dams_cli.create_external_data_provider(external_data_provider=dprov)
        ext_dsrc_id = dams_cli.create_data_source(data_source=dsrc)
        ext_dsrc_model_id = dams_cli.create_data_source_model(dsrc_model)

        # Register the ExternalDataset
        dproducer_id = dams_cli.register_external_data_set(external_dataset_id=ds_id)

        # Or using each method
        dams_cli.assign_data_source_to_external_data_provider(data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id)
        dams_cli.assign_data_source_to_data_model(data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id)
        dams_cli.assign_external_dataset_to_data_source(external_dataset_id=ds_id, data_source_id=ext_dsrc_id)
        dams_cli.assign_external_dataset_to_agent_instance(external_dataset_id=ds_id, agent_instance_id=eda_inst_id)

        #create temp streamdef so the data product can create the stream
        pc_list = []

        #Get 'time' parameter context
        pc_list.append(dms_cli.read_parameter_context_by_name('time', id_only=True))
        pc_list.append(dms_cli.read_parameter_context_by_name('dummy', id_only=True))

        for pc_k, pc in self._create_parameter_dictionary().iteritems():
            pc_list.append(dms_cli.create_parameter_context(pc_k, pc[1].dump()))

        pdict_id = dms_cli.create_parameter_dictionary('dummy_param_dict', pc_list)

        streamdef_id = pubsub_cli.create_stream_definition(name="dummy", parameter_dictionary_id=pdict_id, description="dummy")

        tdom, sdom = time_series_domain()
        tdom, sdom = tdom.dump(), sdom.dump()

        dprod = DataProduct(
            name='dummy_dataset',
            description='dummy data product',
            temporal_domain=tdom,
            spatial_domain=sdom)

        dproduct_id = dpms_cli.create_data_product(data_product=dprod,
            stream_definition_id=streamdef_id)

        dams_cli.assign_data_product(input_resource_id=ds_id, data_product_id=dproduct_id)  # , create_stream=True)

        stream_id, assn = rr_cli.find_objects(subject=dproduct_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True)
        stream_id = stream_id[0]

        log.info('Created resources: {0}'.format({'ExternalDataset': ds_id, 'ExternalDataProvider': ext_dprov_id, 'DataSource': ext_dsrc_id, 'DataSourceModel': ext_dsrc_model_id, 'DataProducer': dproducer_id, 'DataProduct': dproduct_id, 'Stream': stream_id}))

        # Create the logger for receiving publications
        _, stream_route, stream_def = self.create_stream_and_logger(name='dummy', stream_id=stream_id)

        self.EDA_RESOURCE_ID = ds_id
        self.EDA_NAME = ds_name
        self.DVR_CONFIG['dh_cfg'] = {
            'TESTING': True,
            'stream_id': stream_id,
            'stream_route': stream_route,
            'stream_def': stream_def,
            'data_producer_id': dproducer_id,  # CBM: Should this be put in the main body of the config - with mod & cls?
            'max_records': 4,
            }

        folder = 'test_data/dummy'

        if os.path.isdir(folder):
            for the_file in os.listdir(folder):
                file_path = os.path.join(folder, the_file)
                try:
                    if os.path.isfile(file_path):
                        os.unlink(file_path)
                except Exception as e:
                    log.debug('_setup_resources error: {0}'.format(e))

        if not os.path.exists(folder):
            os.makedirs(folder)

        self.add_dummy_file('test_data/dummy/test2012-02-01-12.dum')
        self.add_dummy_file('test_data/dummy/test2012-02-01-13.dum')
        self.add_dummy_file('test_data/dummy/test2012-02-01-14.dum')
        self.add_dummy_file('test_data/dummy/test2012-02-01-15.dum')
コード例 #13
0
class DatasetManagementIntTest(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.resource_registry = ResourceRegistryServiceClient()
        self.dataset_management = DatasetManagementServiceClient()

    def test_dataset_crud(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        tdom, sdom = time_series_domain()
        dataset_id = self.dataset_management.create_dataset(
            name='ctd_dataset',
            parameter_dictionary_id=pdict_id,
            spatial_domain=sdom.dump(),
            temporal_domain=tdom.dump())

        ds_obj = self.dataset_management.read_dataset(dataset_id)
        self.assertEquals(ds_obj.name, 'ctd_dataset')

        ds_obj.name = 'something different'
        self.dataset_management.update_dataset(ds_obj)
        self.dataset_management.register_dataset(dataset_id)
        ds_obj2 = self.dataset_management.read_dataset(dataset_id)
        self.assertEquals(ds_obj.name, ds_obj2.name)
        self.assertTrue(ds_obj2.registered)

    def test_context_crud(self):
        context_ids = self.create_contexts()
        context_id = context_ids.pop()

        context = DatasetManagementService.get_parameter_context(context_id)
        self.assertIsInstance(context, ParameterContext)
        self.assertEquals(context.identifier, context_id)

        self.dataset_management.delete_parameter_context(context_id)

        with self.assertRaises(NotFound):
            self.dataset_management.read_parameter_context(context_id)

    def test_pfunc_crud(self):
        contexts, funcs = self.create_pfuncs()
        context_ids = [
            context_id for ctxt, context_id in contexts.itervalues()
        ]

        pdict_id = self.dataset_management.create_parameter_dictionary(
            name='functional_pdict',
            parameter_context_ids=context_ids,
            temporal_context='time')
        self.addCleanup(self.dataset_management.delete_parameter_dictionary,
                        pdict_id)

        expr, expr_id = funcs['CONDWAT_L1']
        func_class = DatasetManagementService.get_parameter_function(expr_id)
        self.assertIsInstance(func_class, NumexprFunction)

    def test_pdict_crud(self):
        context_ids = self.create_contexts()
        pdict_res_id = self.dataset_management.create_parameter_dictionary(
            name='pdict1',
            parameter_context_ids=context_ids,
            temporal_context='time')

        pdict_contexts = self.dataset_management.read_parameter_contexts(
            parameter_dictionary_id=pdict_res_id, id_only=True)

        pdict = DatasetManagementService.get_parameter_dictionary(pdict_res_id)
        self.assertIsInstance(pdict, ParameterDictionary)
        self.assertTrue('time_test' in pdict)
        self.assertEquals(pdict.identifier, pdict_res_id)

        self.assertEquals(set(pdict_contexts), set(context_ids))

        self.dataset_management.delete_parameter_dictionary(
            parameter_dictionary_id=pdict_res_id)
        with self.assertRaises(NotFound):
            self.dataset_management.read_parameter_dictionary(
                parameter_dictionary_id=pdict_res_id)

    def create_contexts(self):
        context_ids = []
        cond_ctxt = ParameterContext(
            'conductivity_test',
            param_type=QuantityType(value_encoding=np.float32))
        cond_ctxt.uom = 'unknown'
        cond_ctxt.fill_value = 0e0
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='conductivity_test', parameter_context=cond_ctxt.dump()))

        pres_ctxt = ParameterContext(
            'pressure_test',
            param_type=QuantityType(value_encoding=np.float32))
        pres_ctxt.uom = 'Pascal'
        pres_ctxt.fill_value = 0x0
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='pressure_test', parameter_context=pres_ctxt.dump()))

        sal_ctxt = ParameterContext(
            'salinity_test',
            param_type=QuantityType(value_encoding=np.float32))
        sal_ctxt.uom = 'PSU'
        sal_ctxt.fill_value = 0x0
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='salinity_test', parameter_context=sal_ctxt.dump()))

        temp_ctxt = ParameterContext(
            'temp_test', param_type=QuantityType(value_encoding=np.float32))
        temp_ctxt.uom = 'degree_Celsius'
        temp_ctxt.fill_value = 0e0
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='temp_test', parameter_context=temp_ctxt.dump()))

        t_ctxt = ParameterContext(
            'time_test', param_type=QuantityType(value_encoding=np.int64))
        t_ctxt.uom = 'seconds since 1970-01-01'
        t_ctxt.fill_value = 0x0
        context_ids.append(
            self.dataset_management.create_parameter_context(
                name='time_test', parameter_context=t_ctxt.dump()))

        return context_ids

    def create_pfuncs(self):
        contexts = {}
        funcs = {}

        t_ctxt = ParameterContext(
            'TIME', param_type=QuantityType(value_encoding=np.dtype('int64')))
        t_ctxt.uom = 'seconds since 01-01-1900'
        t_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_TIME', parameter_context=t_ctxt.dump())
        contexts['TIME'] = (t_ctxt, t_ctxt_id)

        lat_ctxt = ParameterContext(
            'LAT',
            param_type=ConstantType(
                QuantityType(value_encoding=np.dtype('float32'))),
            fill_value=-9999)
        lat_ctxt.axis = AxisTypeEnum.LAT
        lat_ctxt.uom = 'degree_north'
        lat_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_LAT', parameter_context=lat_ctxt.dump())
        contexts['LAT'] = lat_ctxt, lat_ctxt_id

        lon_ctxt = ParameterContext(
            'LON',
            param_type=ConstantType(
                QuantityType(value_encoding=np.dtype('float32'))),
            fill_value=-9999)
        lon_ctxt.axis = AxisTypeEnum.LON
        lon_ctxt.uom = 'degree_east'
        lon_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_LON', parameter_context=lon_ctxt.dump())
        contexts['LON'] = lon_ctxt, lon_ctxt_id

        # Independent Parameters

        # Temperature - values expected to be the decimal results of conversion from hex
        temp_ctxt = ParameterContext(
            'TEMPWAT_L0',
            param_type=QuantityType(value_encoding=np.dtype('float32')),
            fill_value=-9999)
        temp_ctxt.uom = 'deg_C'
        temp_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_TEMPWAT_L0', parameter_context=temp_ctxt.dump())
        contexts['TEMPWAT_L0'] = temp_ctxt, temp_ctxt_id

        # Conductivity - values expected to be the decimal results of conversion from hex
        cond_ctxt = ParameterContext(
            'CONDWAT_L0',
            param_type=QuantityType(value_encoding=np.dtype('float32')),
            fill_value=-9999)
        cond_ctxt.uom = 'S m-1'
        cond_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_CONDWAT_L0', parameter_context=cond_ctxt.dump())
        contexts['CONDWAT_L0'] = cond_ctxt, cond_ctxt_id

        # Pressure - values expected to be the decimal results of conversion from hex
        press_ctxt = ParameterContext(
            'PRESWAT_L0',
            param_type=QuantityType(value_encoding=np.dtype('float32')),
            fill_value=-9999)
        press_ctxt.uom = 'dbar'
        press_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_PRESWAT_L0', parameter_context=press_ctxt.dump())
        contexts['PRESWAT_L0'] = press_ctxt, press_ctxt_id

        # Dependent Parameters

        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        tl1_func = '(T / 10000) - 10'
        expr = NumexprFunction('TEMPWAT_L1', tl1_func, ['T'])
        expr_id = self.dataset_management.create_parameter_function(
            name='test_TEMPWAT_L1', parameter_function=expr.dump())
        funcs['TEMPWAT_L1'] = expr, expr_id

        tl1_pmap = {'T': 'TEMPWAT_L0'}
        expr.param_map = tl1_pmap
        tempL1_ctxt = ParameterContext(
            'TEMPWAT_L1',
            param_type=ParameterFunctionType(function=expr),
            variability=VariabilityEnum.TEMPORAL)
        tempL1_ctxt.uom = 'deg_C'
        tempL1_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_TEMPWAT_L1',
            parameter_context=tempL1_ctxt.dump(),
            parameter_function_id=expr_id)
        contexts['TEMPWAT_L1'] = tempL1_ctxt, tempL1_ctxt_id

        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        cl1_func = '(C / 100000) - 0.5'
        expr = NumexprFunction('CONDWAT_L1', cl1_func, ['C'])
        expr_id = self.dataset_management.create_parameter_function(
            name='test_CONDWAT_L1', parameter_function=expr.dump())
        funcs['CONDWAT_L1'] = expr, expr_id

        cl1_pmap = {'C': 'CONDWAT_L0'}
        expr.param_map = cl1_pmap
        condL1_ctxt = ParameterContext(
            'CONDWAT_L1',
            param_type=ParameterFunctionType(function=expr),
            variability=VariabilityEnum.TEMPORAL)
        condL1_ctxt.uom = 'S m-1'
        condL1_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_CONDWAT_L1',
            parameter_context=condL1_ctxt.dump(),
            parameter_function_id=expr_id)
        contexts['CONDWAT_L1'] = condL1_ctxt, condL1_ctxt_id

        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)'
        expr = NumexprFunction('PRESWAT_L1', pl1_func, ['P', 'p_range'])
        expr_id = self.dataset_management.create_parameter_function(
            name='test_PRESWAT_L1', parameter_function=expr.dump())
        funcs['PRESWAT_L1'] = expr, expr_id

        pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721}
        expr.param_map = pl1_pmap
        presL1_ctxt = ParameterContext(
            'PRESWAT_L1',
            param_type=ParameterFunctionType(function=expr),
            variability=VariabilityEnum.TEMPORAL)
        presL1_ctxt.uom = 'S m-1'
        presL1_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_CONDWAT_L1',
            parameter_context=presL1_ctxt.dump(),
            parameter_function_id=expr_id)
        contexts['PRESWAT_L1'] = presL1_ctxt, presL1_ctxt_id

        # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project:
        #       https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1

        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        owner = 'gsw'
        sal_func = 'SP_from_C'
        sal_arglist = ['C', 't', 'p']
        expr = PythonFunction('PRACSAL', owner, sal_func, sal_arglist)
        expr_id = self.dataset_management.create_parameter_function(
            name='test_PRACSAL', parameter_function=expr.dump())
        funcs['PRACSAL'] = expr, expr_id

        # A magic function that may or may not exist actually forms the line below at runtime.
        sal_pmap = {
            'C':
            NumexprFunction('CONDWAT_L1*10',
                            'C*10', ['C'],
                            param_map={'C': 'CONDWAT_L1'}),
            't':
            'TEMPWAT_L1',
            'p':
            'PRESWAT_L1'
        }
        expr.param_map = sal_pmap
        sal_ctxt = ParameterContext('PRACSAL',
                                    param_type=ParameterFunctionType(expr),
                                    variability=VariabilityEnum.TEMPORAL)
        sal_ctxt.uom = 'g kg-1'
        sal_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_PRACSAL',
            parameter_context=sal_ctxt.dump(),
            parameter_function_id=expr_id)
        contexts['PRACSAL'] = sal_ctxt, sal_ctxt_id

        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        owner = 'gsw'
        abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP',
                                      ['PRACSAL', 'PRESWAT_L1', 'LON', 'LAT'])
        cons_temp_expr = PythonFunction(
            'cons_temp', owner, 'CT_from_t',
            [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1'])
        dens_expr = PythonFunction(
            'DENSITY', owner, 'rho',
            [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1'])
        dens_ctxt = ParameterContext(
            'DENSITY',
            param_type=ParameterFunctionType(dens_expr),
            variability=VariabilityEnum.TEMPORAL)
        dens_ctxt.uom = 'kg m-3'
        dens_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_DENSITY', parameter_context=dens_ctxt.dump())
        contexts['DENSITY'] = dens_ctxt, dens_ctxt_id
        return contexts, funcs

    def test_verify_contexts(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)
        pcontexts = self.dataset_management.read_parameter_contexts(
            parameter_dictionary_id=pdict_id)
        for pcontext in pcontexts:
            self.assertTrue('fill_value' in pcontext)
            self.assertTrue('reference_urls' in pcontext)
            self.assertTrue('internal_name' in pcontext)
            self.assertTrue('display_name' in pcontext)
            self.assertTrue('standard_name' in pcontext)
            self.assertTrue('ooi_short_name' in pcontext)
            self.assertTrue('description' in pcontext)
            self.assertTrue('precision' in pcontext)
コード例 #14
0
class RecordDictionaryIntegrationTest(IonIntegrationTestCase):
    xps = []
    xns = []

    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.dataset_management = DatasetManagementServiceClient()
        self.pubsub_management = PubsubManagementServiceClient()

        self.rdt = None
        self.data_producer_id = None
        self.provider_metadata_update = None
        self.event = Event()

    def tearDown(self):
        for xn in self.xns:
            xni = self.container.ex_manager.create_xn_queue(xn)
            xni.delete()
        for xp in self.xps:
            xpi = self.container.ex_manager.create_xp(xp)
            xpi.delete()

    def verify_incoming(self, m, r, s):
        rdt = RecordDictionaryTool.load_from_granule(m)
        self.assertEquals(rdt, self.rdt)
        self.assertEquals(m.data_producer_id, self.data_producer_id)
        self.assertEquals(m.provider_metadata_update,
                          self.provider_metadata_update)
        self.assertNotEqual(m.creation_timestamp, None)
        self.event.set()

    def test_granule(self):

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition(
            'ctd', parameter_dictionary_id=pdict_id)
        pdict = DatasetManagementService.get_parameter_dictionary_by_name(
            'ctd_parsed_param_dict')
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        stream_id, route = self.pubsub_management.create_stream(
            'ctd_stream', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)
        self.xps.append('xp1')
        publisher = StandaloneStreamPublisher(stream_id, route)

        subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming)
        subscriber.start()

        subscription_id = self.pubsub_management.create_subscription(
            'sub', stream_ids=[stream_id])
        self.xns.append('sub')
        self.pubsub_management.activate_subscription(subscription_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['pressure'] = [20] * 10

        self.assertEquals(set(pdict.keys()), set(rdt.fields))
        self.assertEquals(pdict.temporal_parameter_name,
                          rdt.temporal_parameter)

        self.rdt = rdt
        self.data_producer_id = 'data_producer'
        self.provider_metadata_update = {1: 1}

        publisher.publish(
            rdt.to_granule(data_producer_id='data_producer',
                           provider_metadata_update={1: 1}))

        self.assertTrue(self.event.wait(10))

        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)

        filtered_stream_def_id = self.pubsub_management.create_stream_definition(
            'filtered',
            parameter_dictionary_id=pdict_id,
            available_fields=['time', 'temp'])
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        filtered_stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id)
        self.assertEquals(rdt._available_fields, ['time', 'temp'])
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        with self.assertRaises(KeyError):
            rdt['pressure'] = np.arange(20)

        granule = rdt.to_granule()
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        self.assertEquals(rdt._available_fields, rdt2._available_fields)
        self.assertEquals(rdt.fields, rdt2.fields)
        for k, v in rdt.iteritems():
            self.assertTrue(np.array_equal(rdt[k], rdt2[k]))

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.array([None, None, None])
        self.assertTrue(rdt['time'] is None)

        rdt['time'] = np.array([None, 1, 2])
        self.assertEquals(rdt['time'][0], rdt.fill_value('time'))

    def test_rdt_param_funcs(self):
        rdt = self.create_rdt()
        rdt['TIME'] = [0]
        rdt['TEMPWAT_L0'] = [280000]
        rdt['CONDWAT_L0'] = [100000]
        rdt['PRESWAT_L0'] = [2789]

        rdt['LAT'] = [45]
        rdt['LON'] = [-71]

        np.testing.assert_array_almost_equal(rdt['DENSITY'],
                                             np.array([1001.76506258]))

    def create_rdt(self):
        contexts, pfuncs = self.create_pfuncs()
        context_ids = [_id for ct, _id in contexts.itervalues()]

        pdict_id = self.dataset_management.create_parameter_dictionary(
            name='functional_pdict',
            parameter_context_ids=context_ids,
            temporal_context='test_TIME')
        self.addCleanup(self.dataset_management.delete_parameter_dictionary,
                        pdict_id)
        stream_def_id = self.pubsub_management.create_stream_definition(
            'functional', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        return rdt

    def create_pfuncs(self):

        contexts = {}
        funcs = {}

        t_ctxt = ParameterContext(
            'TIME', param_type=QuantityType(value_encoding=np.dtype('int64')))
        t_ctxt.uom = 'seconds since 01-01-1900'
        t_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_TIME', parameter_context=t_ctxt.dump())
        self.addCleanup(self.dataset_management.delete_parameter_context,
                        t_ctxt_id)
        contexts['TIME'] = (t_ctxt, t_ctxt_id)

        lat_ctxt = ParameterContext(
            'LAT',
            param_type=ConstantType(
                QuantityType(value_encoding=np.dtype('float32'))),
            fill_value=-9999)
        lat_ctxt.axis = AxisTypeEnum.LAT
        lat_ctxt.uom = 'degree_north'
        lat_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_LAT', parameter_context=lat_ctxt.dump())
        self.addCleanup(self.dataset_management.delete_parameter_context,
                        lat_ctxt_id)
        contexts['LAT'] = lat_ctxt, lat_ctxt_id

        lon_ctxt = ParameterContext(
            'LON',
            param_type=ConstantType(
                QuantityType(value_encoding=np.dtype('float32'))),
            fill_value=-9999)
        lon_ctxt.axis = AxisTypeEnum.LON
        lon_ctxt.uom = 'degree_east'
        lon_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_LON', parameter_context=lon_ctxt.dump())
        self.addCleanup(self.dataset_management.delete_parameter_context,
                        lon_ctxt_id)
        contexts['LON'] = lon_ctxt, lon_ctxt_id

        # Independent Parameters

        # Temperature - values expected to be the decimal results of conversion from hex
        temp_ctxt = ParameterContext(
            'TEMPWAT_L0',
            param_type=QuantityType(value_encoding=np.dtype('float32')),
            fill_value=-9999)
        temp_ctxt.uom = 'deg_C'
        temp_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_TEMPWAT_L0', parameter_context=temp_ctxt.dump())
        self.addCleanup(self.dataset_management.delete_parameter_context,
                        temp_ctxt_id)
        contexts['TEMPWAT_L0'] = temp_ctxt, temp_ctxt_id

        # Conductivity - values expected to be the decimal results of conversion from hex
        cond_ctxt = ParameterContext(
            'CONDWAT_L0',
            param_type=QuantityType(value_encoding=np.dtype('float32')),
            fill_value=-9999)
        cond_ctxt.uom = 'S m-1'
        cond_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_CONDWAT_L0', parameter_context=cond_ctxt.dump())
        self.addCleanup(self.dataset_management.delete_parameter_context,
                        cond_ctxt_id)
        contexts['CONDWAT_L0'] = cond_ctxt, cond_ctxt_id

        # Pressure - values expected to be the decimal results of conversion from hex
        press_ctxt = ParameterContext(
            'PRESWAT_L0',
            param_type=QuantityType(value_encoding=np.dtype('float32')),
            fill_value=-9999)
        press_ctxt.uom = 'dbar'
        press_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_PRESWAT_L0', parameter_context=press_ctxt.dump())
        self.addCleanup(self.dataset_management.delete_parameter_context,
                        press_ctxt_id)
        contexts['PRESWAT_L0'] = press_ctxt, press_ctxt_id

        # Dependent Parameters

        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        tl1_func = '(T / 10000) - 10'
        expr = NumexprFunction('TEMPWAT_L1', tl1_func, ['T'])
        expr_id = self.dataset_management.create_parameter_function(
            name='test_TEMPWAT_L1', parameter_function=expr.dump())
        self.addCleanup(self.dataset_management.delete_parameter_function,
                        expr_id)
        funcs['TEMPWAT_L1'] = expr, expr_id

        tl1_pmap = {'T': 'TEMPWAT_L0'}
        expr.param_map = tl1_pmap
        tempL1_ctxt = ParameterContext(
            'TEMPWAT_L1',
            param_type=ParameterFunctionType(function=expr),
            variability=VariabilityEnum.TEMPORAL)
        tempL1_ctxt.uom = 'deg_C'
        tempL1_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_TEMPWAT_L1',
            parameter_context=tempL1_ctxt.dump(),
            parameter_function_ids=[expr_id])
        self.addCleanup(self.dataset_management.delete_parameter_context,
                        tempL1_ctxt_id)
        contexts['TEMPWAT_L1'] = tempL1_ctxt, tempL1_ctxt_id

        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        cl1_func = '(C / 100000) - 0.5'
        expr = NumexprFunction('CONDWAT_L1', cl1_func, ['C'])
        expr_id = self.dataset_management.create_parameter_function(
            name='test_CONDWAT_L1', parameter_function=expr.dump())
        self.addCleanup(self.dataset_management.delete_parameter_function,
                        expr_id)
        funcs['CONDWAT_L1'] = expr, expr_id

        cl1_pmap = {'C': 'CONDWAT_L0'}
        expr.param_map = cl1_pmap
        condL1_ctxt = ParameterContext(
            'CONDWAT_L1',
            param_type=ParameterFunctionType(function=expr),
            variability=VariabilityEnum.TEMPORAL)
        condL1_ctxt.uom = 'S m-1'
        condL1_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_CONDWAT_L1',
            parameter_context=condL1_ctxt.dump(),
            parameter_function_ids=[expr_id])
        self.addCleanup(self.dataset_management.delete_parameter_context,
                        condL1_ctxt_id)
        contexts['CONDWAT_L1'] = condL1_ctxt, condL1_ctxt_id

        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)'
        expr = NumexprFunction('PRESWAT_L1', pl1_func, ['P', 'p_range'])
        expr_id = self.dataset_management.create_parameter_function(
            name='test_PRESWAT_L1', parameter_function=expr.dump())
        self.addCleanup(self.dataset_management.delete_parameter_function,
                        expr_id)
        funcs['PRESWAT_L1'] = expr, expr_id

        pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721}
        expr.param_map = pl1_pmap
        presL1_ctxt = ParameterContext(
            'PRESWAT_L1',
            param_type=ParameterFunctionType(function=expr),
            variability=VariabilityEnum.TEMPORAL)
        presL1_ctxt.uom = 'S m-1'
        presL1_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_CONDWAT_L1',
            parameter_context=presL1_ctxt.dump(),
            parameter_function_ids=[expr_id])
        self.addCleanup(self.dataset_management.delete_parameter_context,
                        presL1_ctxt_id)
        contexts['PRESWAT_L1'] = presL1_ctxt, presL1_ctxt_id

        # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project:
        #       https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1

        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        owner = 'gsw'
        sal_func = 'SP_from_C'
        sal_arglist = ['C', 't', 'p']
        expr = PythonFunction('PRACSAL', owner, sal_func, sal_arglist)
        expr_id = self.dataset_management.create_parameter_function(
            name='test_PRACSAL', parameter_function=expr.dump())
        self.addCleanup(self.dataset_management.delete_parameter_function,
                        expr_id)
        funcs['PRACSAL'] = expr, expr_id

        # A magic function that may or may not exist actually forms the line below at runtime.
        sal_pmap = {
            'C':
            NumexprFunction('CONDWAT_L1*10',
                            'C*10', ['C'],
                            param_map={'C': 'CONDWAT_L1'}),
            't':
            'TEMPWAT_L1',
            'p':
            'PRESWAT_L1'
        }
        expr.param_map = sal_pmap
        sal_ctxt = ParameterContext('PRACSAL',
                                    param_type=ParameterFunctionType(expr),
                                    variability=VariabilityEnum.TEMPORAL)
        sal_ctxt.uom = 'g kg-1'
        sal_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_PRACSAL',
            parameter_context=sal_ctxt.dump(),
            parameter_function_ids=[expr_id])
        self.addCleanup(self.dataset_management.delete_parameter_context,
                        sal_ctxt_id)
        contexts['PRACSAL'] = sal_ctxt, sal_ctxt_id

        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        owner = 'gsw'
        abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP',
                                      ['PRACSAL', 'PRESWAT_L1', 'LON', 'LAT'])
        cons_temp_expr = PythonFunction(
            'cons_temp', owner, 'CT_from_t',
            [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1'])
        dens_expr = PythonFunction(
            'DENSITY', owner, 'rho',
            [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1'])
        dens_ctxt = ParameterContext(
            'DENSITY',
            param_type=ParameterFunctionType(dens_expr),
            variability=VariabilityEnum.TEMPORAL)
        dens_ctxt.uom = 'kg m-3'
        dens_ctxt_id = self.dataset_management.create_parameter_context(
            name='test_DENSITY', parameter_context=dens_ctxt.dump())
        self.addCleanup(self.dataset_management.delete_parameter_context,
                        dens_ctxt_id)
        contexts['DENSITY'] = dens_ctxt, dens_ctxt_id
        return contexts, funcs
コード例 #15
0
class RecordDictionaryIntegrationTest(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.dataset_management = DatasetManagementServiceClient()
        self.pubsub_management  = PubsubManagementServiceClient()

        self.rdt                      = None
        self.data_producer_id         = None
        self.provider_metadata_update = None
        self.event                    = Event()

    def verify_incoming(self, m,r,s):
        rdt = RecordDictionaryTool.load_from_granule(m)
        self.assertEquals(rdt, self.rdt)
        self.assertEquals(m.data_producer_id, self.data_producer_id)
        self.assertEquals(m.provider_metadata_update, self.provider_metadata_update)
        self.assertNotEqual(m.creation_timestamp, None)
        self.event.set()


    def test_serialize_compatability(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition('ctd extended', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('ctd1', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        sub_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)

        verified = Event()
        def verifier(msg, route, stream_id):
            for k,v in msg.record_dictionary.iteritems():
                if v is not None:
                    self.assertIsInstance(v, np.ndarray)
            rdt = RecordDictionaryTool.load_from_granule(msg)
            for field in rdt.fields:
                self.assertIsInstance(rdt[field], np.ndarray)
            verified.set()

        subscriber = StandaloneStreamSubscriber('sub1', callback=verifier)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        publisher = StandaloneStreamPublisher(stream_id,route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        ph.fill_rdt(rdt,10)
        publisher.publish(rdt.to_granule())
        self.assertTrue(verified.wait(10))


    def test_granule(self):
        
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"})
        pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict')
        self.addCleanup(self.pubsub_management.delete_stream_definition,stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream,stream_id)
        publisher = StandaloneStreamPublisher(stream_id, route)

        subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id])
        self.pubsub_management.activate_subscription(subscription_id)


        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['pressure'] = [20] * 10

        self.assertEquals(set(pdict.keys()), set(rdt.fields))
        self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter)

        self.assertEquals(rdt._stream_config['reference_designator'],"GA03FLMA-RI001-13-CTDMOG999")

        self.rdt = rdt
        self.data_producer_id = 'data_producer'
        self.provider_metadata_update = {1:1}

        publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1}))

        self.assertTrue(self.event.wait(10))
        
        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
        
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.array([None,None,None])
        self.assertTrue(rdt['time'] is None)
        
        rdt['time'] = np.array([None, 1, 2])
        self.assertEquals(rdt['time'][0], rdt.fill_value('time'))


        stream_def_obj = self.pubsub_management.read_stream_definition(stream_def_id)
        rdt = RecordDictionaryTool(stream_definition=stream_def_obj)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)


        granule = rdt.to_granule()
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        np.testing.assert_array_equal(rdt['temp'], np.arange(20))

        
    def test_filter(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        filtered_stream_def_id = self.pubsub_management.create_stream_definition('filtered', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, filtered_stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id)
        self.assertEquals(rdt._available_fields,['time','temp'])
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        with self.assertRaises(KeyError):
            rdt['pressure'] = np.arange(20)

        granule = rdt.to_granule(connection_id='c1', connection_index='0')
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        self.assertEquals(rdt._available_fields, rdt2._available_fields)
        self.assertEquals(rdt.fields, rdt2.fields)
        self.assertEquals(rdt2.connection_id,'c1')
        self.assertEquals(rdt2.connection_index,'0')
        for k,v in rdt.iteritems():
            self.assertTrue(np.array_equal(rdt[k], rdt2[k]))
        


    def test_rdt_param_funcs(self):
        rdt = self.create_rdt()
        rdt['TIME'] = [0]
        rdt['TEMPWAT_L0'] = [280000]
        rdt['CONDWAT_L0'] = [100000]
        rdt['PRESWAT_L0'] = [2789]

        rdt['LAT'] = [45]
        rdt['LON'] = [-71]

        np.testing.assert_array_almost_equal(rdt['DENSITY'], np.array([1001.76506258], dtype='float32'))

    def test_rdt_lookup(self):
        rdt = self.create_lookup_rdt()

        self.assertTrue('offset_a' in rdt.lookup_values())
        self.assertFalse('offset_b' in rdt.lookup_values())

        rdt['time'] = [0]
        rdt['temp'] = [10.0]
        rdt['offset_a'] = [2.0]
        self.assertEquals(rdt['offset_b'], None)
        self.assertEquals(rdt.lookup_values(), ['offset_a'])
        np.testing.assert_array_almost_equal(rdt['calibrated'], np.array([12.0]))

        svm = StoredValueManager(self.container)
        svm.stored_value_cas('coefficient_document', {'offset_b':2.0})
        svm.stored_value_cas("GA03FLMA-RI001-13-CTDMOG999_OFFSETC", {'offset_c':3.0})
        rdt.fetch_lookup_values()
        np.testing.assert_array_equal(rdt['offset_b'], np.array([2.0]))
        np.testing.assert_array_equal(rdt['calibrated_b'], np.array([14.0]))
        np.testing.assert_array_equal(rdt['offset_c'], np.array([3.0]))


    def create_rdt(self):
        contexts, pfuncs = self.create_pfuncs()
        context_ids = [_id for ct,_id in contexts.itervalues()]

        pdict_id = self.dataset_management.create_parameter_dictionary(name='functional_pdict', parameter_context_ids=context_ids, temporal_context='test_TIME')
        self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)
        stream_def_id = self.pubsub_management.create_stream_definition('functional', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        return rdt

    def create_lookup_rdt(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()

        stream_def_id = self.pubsub_management.create_stream_definition('lookup', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"})
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        return rdt


    def create_pfuncs(self):
        
        contexts = {}
        funcs = {}

        t_ctxt = ParameterContext('TIME', param_type=QuantityType(value_encoding=np.dtype('int64')))
        t_ctxt.uom = 'seconds since 1900-01-01'
        t_ctxt_id = self.dataset_management.create_parameter_context(name='test_TIME', parameter_context=t_ctxt.dump())
        self.addCleanup(self.dataset_management.delete_parameter_context, t_ctxt_id)
        contexts['TIME'] = (t_ctxt, t_ctxt_id)

        lat_ctxt = ParameterContext('LAT', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999)
        lat_ctxt.axis = AxisTypeEnum.LAT
        lat_ctxt.uom = 'degree_north'
        lat_ctxt_id = self.dataset_management.create_parameter_context(name='test_LAT', parameter_context=lat_ctxt.dump())
        self.addCleanup(self.dataset_management.delete_parameter_context, lat_ctxt_id)
        contexts['LAT'] = lat_ctxt, lat_ctxt_id

        lon_ctxt = ParameterContext('LON', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999)
        lon_ctxt.axis = AxisTypeEnum.LON
        lon_ctxt.uom = 'degree_east'
        lon_ctxt_id = self.dataset_management.create_parameter_context(name='test_LON', parameter_context=lon_ctxt.dump())
        self.addCleanup(self.dataset_management.delete_parameter_context, lon_ctxt_id)
        contexts['LON'] = lon_ctxt, lon_ctxt_id

        # Independent Parameters

        # Temperature - values expected to be the decimal results of conversion from hex
        temp_ctxt = ParameterContext('TEMPWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999)
        temp_ctxt.uom = 'deg_C'
        temp_ctxt_id = self.dataset_management.create_parameter_context(name='test_TEMPWAT_L0', parameter_context=temp_ctxt.dump())
        self.addCleanup(self.dataset_management.delete_parameter_context, temp_ctxt_id)
        contexts['TEMPWAT_L0'] = temp_ctxt, temp_ctxt_id

        # Conductivity - values expected to be the decimal results of conversion from hex
        cond_ctxt = ParameterContext('CONDWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999)
        cond_ctxt.uom = 'S m-1'
        cond_ctxt_id = self.dataset_management.create_parameter_context(name='test_CONDWAT_L0', parameter_context=cond_ctxt.dump())
        self.addCleanup(self.dataset_management.delete_parameter_context, cond_ctxt_id)
        contexts['CONDWAT_L0'] = cond_ctxt, cond_ctxt_id

        # Pressure - values expected to be the decimal results of conversion from hex
        press_ctxt = ParameterContext('PRESWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999)
        press_ctxt.uom = 'dbar'
        press_ctxt_id = self.dataset_management.create_parameter_context(name='test_PRESWAT_L0', parameter_context=press_ctxt.dump())
        self.addCleanup(self.dataset_management.delete_parameter_context, press_ctxt_id)
        contexts['PRESWAT_L0'] = press_ctxt, press_ctxt_id


        # Dependent Parameters

        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        tl1_func = '(T / 10000) - 10'
        expr = NumexprFunction('TEMPWAT_L1', tl1_func, ['T'])
        expr_id = self.dataset_management.create_parameter_function(name='test_TEMPWAT_L1', parameter_function=expr.dump())
        self.addCleanup(self.dataset_management.delete_parameter_function, expr_id)
        funcs['TEMPWAT_L1'] = expr, expr_id

        tl1_pmap = {'T': 'TEMPWAT_L0'}
        expr.param_map = tl1_pmap
        tempL1_ctxt = ParameterContext('TEMPWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL)
        tempL1_ctxt.uom = 'deg_C'
        tempL1_ctxt_id = self.dataset_management.create_parameter_context(name='test_TEMPWAT_L1', parameter_context=tempL1_ctxt.dump(), parameter_function_id=expr_id)
        self.addCleanup(self.dataset_management.delete_parameter_context, tempL1_ctxt_id)
        contexts['TEMPWAT_L1'] = tempL1_ctxt, tempL1_ctxt_id

        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        cl1_func = '(C / 100000) - 0.5'
        expr = NumexprFunction('CONDWAT_L1', cl1_func, ['C'])
        expr_id = self.dataset_management.create_parameter_function(name='test_CONDWAT_L1', parameter_function=expr.dump())
        self.addCleanup(self.dataset_management.delete_parameter_function, expr_id)
        funcs['CONDWAT_L1'] = expr, expr_id

        cl1_pmap = {'C': 'CONDWAT_L0'}
        expr.param_map = cl1_pmap
        condL1_ctxt = ParameterContext('CONDWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL)
        condL1_ctxt.uom = 'S m-1'
        condL1_ctxt_id = self.dataset_management.create_parameter_context(name='test_CONDWAT_L1', parameter_context=condL1_ctxt.dump(), parameter_function_id=expr_id)
        self.addCleanup(self.dataset_management.delete_parameter_context, condL1_ctxt_id)
        contexts['CONDWAT_L1'] = condL1_ctxt, condL1_ctxt_id

        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)'
        expr = NumexprFunction('PRESWAT_L1', pl1_func, ['P', 'p_range'])
        expr_id = self.dataset_management.create_parameter_function(name='test_PRESWAT_L1', parameter_function=expr.dump())
        self.addCleanup(self.dataset_management.delete_parameter_function, expr_id)
        funcs['PRESWAT_L1'] = expr, expr_id
        
        pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721}
        expr.param_map = pl1_pmap
        presL1_ctxt = ParameterContext('PRESWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL)
        presL1_ctxt.uom = 'S m-1'
        presL1_ctxt_id = self.dataset_management.create_parameter_context(name='test_CONDWAT_L1', parameter_context=presL1_ctxt.dump(), parameter_function_id=expr_id)
        self.addCleanup(self.dataset_management.delete_parameter_context, presL1_ctxt_id)
        contexts['PRESWAT_L1'] = presL1_ctxt, presL1_ctxt_id

        # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project:
        #       https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1

        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        owner = 'gsw'
        sal_func = 'SP_from_C'
        sal_arglist = ['C', 't', 'p']
        expr = PythonFunction('PRACSAL', owner, sal_func, sal_arglist)
        expr_id = self.dataset_management.create_parameter_function(name='test_PRACSAL', parameter_function=expr.dump())
        self.addCleanup(self.dataset_management.delete_parameter_function, expr_id)
        funcs['PRACSAL'] = expr, expr_id
        
        # A magic function that may or may not exist actually forms the line below at runtime.
        sal_pmap = {'C': NumexprFunction('CONDWAT_L1*10', 'C*10', ['C'], param_map={'C': 'CONDWAT_L1'}), 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'}
        expr.param_map = sal_pmap
        sal_ctxt = ParameterContext('PRACSAL', param_type=ParameterFunctionType(expr), variability=VariabilityEnum.TEMPORAL)
        sal_ctxt.uom = 'g kg-1'
        sal_ctxt_id = self.dataset_management.create_parameter_context(name='test_PRACSAL', parameter_context=sal_ctxt.dump(), parameter_function_id=expr_id)
        self.addCleanup(self.dataset_management.delete_parameter_context, sal_ctxt_id)
        contexts['PRACSAL'] = sal_ctxt, sal_ctxt_id

        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        owner = 'gsw'
        abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON','LAT'])
        cons_temp_expr = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1'])
        dens_expr = PythonFunction('DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1'])
        dens_ctxt = ParameterContext('DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL)
        dens_ctxt.uom = 'kg m-3'
        dens_ctxt_id = self.dataset_management.create_parameter_context(name='test_DENSITY', parameter_context=dens_ctxt.dump())
        self.addCleanup(self.dataset_management.delete_parameter_context, dens_ctxt_id)
        contexts['DENSITY'] = dens_ctxt, dens_ctxt_id
        return contexts, funcs
コード例 #16
0
class TestDMEnd2End(IonIntegrationTestCase):
    def setUp(self): # Love the non pep-8 convention
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.process_dispatcher   = ProcessDispatcherServiceClient()
        self.pubsub_management    = PubsubManagementServiceClient()
        self.resource_registry    = ResourceRegistryServiceClient()
        self.dataset_management   = DatasetManagementServiceClient()
        self.ingestion_management = IngestionManagementServiceClient()
        self.data_retriever       = DataRetrieverServiceClient()
        self.pids                 = []
        self.event                = Event()
        self.exchange_space_name  = 'test_granules'
        self.exchange_point_name  = 'science_data'       

        self.purge_queues()
        self.queue_buffer         = []

    def purge_queues(self):
        xn = self.container.ex_manager.create_xn_queue('science_granule_ingestion')
        xn.purge()
        

    def tearDown(self):
        self.purge_queues()
        for pid in self.pids:
            self.container.proc_manager.terminate_process(pid)
        IngestionManagementIntTest.clean_subscriptions()
        for queue in self.queue_buffer:
            if isinstance(queue, ExchangeNameQueue):
                queue.delete()
            elif isinstance(queue, str):
                xn = self.container.ex_manager.create_xn_queue(queue)
                xn.delete()

        

    def launch_producer(self, stream_id=''):
        #--------------------------------------------------------------------------------
        # Launch the producer
        #--------------------------------------------------------------------------------

        pid = self.container.spawn_process('better_data_producer', 'ion.processes.data.example_data_producer', 'BetterDataProducer', {'process':{'stream_id':stream_id}})

        self.pids.append(pid)

    def get_ingestion_config(self):
        #--------------------------------------------------------------------------------
        # Grab the ingestion configuration from the resource registry
        #--------------------------------------------------------------------------------
        # The ingestion configuration should have been created by the bootstrap service 
        # which is configured through r2deploy.yml

        ingest_configs, _  = self.resource_registry.find_resources(restype=RT.IngestionConfiguration,id_only=True)
        return ingest_configs[0]


    def publish_hifi(self,stream_id,stream_route,offset=0):
        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10) + (offset * 10)
        rdt['temp'] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())

    def publish_fake_data(self,stream_id, route):

        for i in xrange(4):
            self.publish_hifi(stream_id,route,i)
        

    def get_datastore(self, dataset_id):
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore

    def validate_granule_subscription(self, msg, route, stream_id):
        if msg == {}:
            return
        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.info('%s', rdt.pretty_print())
        self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg))
        self.event.set()

    def make_file_data(self):
        from interface.objects import File
        import uuid
        data = 'hello world\n'
        rand = str(uuid.uuid4())[:8]
        meta = File(name='/examples/' + rand + '.txt', group_id='example1')
        return {'body': data, 'meta':meta}

    def publish_file(self, stream_id, stream_route):
        publisher = StandaloneStreamPublisher(stream_id,stream_route)
        publisher.publish(self.make_file_data())
        
    def wait_until_we_have_enough_granules(self, dataset_id='',granules=4):
        datastore = self.get_datastore(dataset_id)
        dataset = self.dataset_management.read_dataset(dataset_id)
        
        with gevent.timeout.Timeout(40):
            success = False
            while not success:
                success = len(datastore.query_view(dataset.view_name)) >= granules
                gevent.sleep(0.1)

        log.info(datastore.query_view(dataset.view_name))




    def wait_until_we_have_enough_files(self):
        datastore = self.container.datastore_manager.get_datastore('filesystem', DataStore.DS_PROFILE.FILESYSTEM)

        now = time.time()
        timeout = now + 10
        done = False
        while not done:
            if now >= timeout:
                raise Timeout('Files are not populating in time.')
            if len(datastore.query_view('catalog/file_by_owner')) >= 1:
                done = True
            now = time.time()


    def create_dataset(self, parameter_dict_id=''):
        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()
        if not parameter_dict_id:
            parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)

        dataset_id = self.dataset_management.create_dataset('test_dataset', parameter_dictionary_id=parameter_dict_id, spatial_domain=sdom, temporal_domain=tdom)
        return dataset_id

    @unittest.skip('Doesnt work')
    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_replay_pause(self):
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        

        stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id)
        replay_stream, replay_route = self.pubsub_management.create_stream('replay', 'xp1', stream_definition_id=stream_def_id)
        dataset_id = self.create_dataset(pdict_id)
        scov = DatasetManagementService._get_coverage(dataset_id)

        bb = CoverageCraft(scov)
        bb.rdt['time'] = np.arange(100)
        bb.rdt['temp'] = np.random.random(100) + 30
        bb.sync_with_granule()

        DatasetManagementService._persist_coverage(dataset_id, bb.coverage) # This invalidates it for multi-host configurations
        # Set up the subscriber to verify the data
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        xp = self.container.ex_manager.create_xp('xp1')
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        # Set up the replay agent and the client wrapper

        # 1) Define the Replay (dataset and stream to publish on)
        self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream)
        # 2) Make a client to the interact with the process (optionall provide it a process to bind with)
        replay_client = ReplayClient(process_id)
        # 3) Start the agent (launch the process)
        self.data_retriever.start_replay_agent(self.replay_id)
        # 4) Start replaying...
        replay_client.start_replay()
        
        # Wait till we get some granules
        self.assertTrue(self.event.wait(5))
        
        # We got granules, pause the replay, clear the queue and allow the process to finish consuming
        replay_client.pause_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()
        
        # Make sure there's no remaining messages being consumed
        self.assertFalse(self.event.wait(1))

        # Resume the replay and wait until we start getting granules again
        replay_client.resume_replay()
        self.assertTrue(self.event.wait(5))
    
        # Stop the replay, clear the queues
        replay_client.stop_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure that it did indeed stop
        self.assertFalse(self.event.wait(1))

        subscriber.stop()


    @attr('SMOKE') 
    def test_dm_end_2_end(self):
        #--------------------------------------------------------------------------------
        # Set up a stream and have a mock instrument (producer) send data
        #--------------------------------------------------------------------------------
        self.event.clear()

        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        
        stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)


        stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)




        #--------------------------------------------------------------------------------
        # Start persisting the data on the stream 
        # - Get the ingestion configuration from the resource registry
        # - Create the dataset
        # - call persist_data_stream to setup the subscription for the ingestion workers
        #   on the stream that you specify which causes the data to be persisted
        #--------------------------------------------------------------------------------

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)

        #--------------------------------------------------------------------------------
        # Now the granules are ingesting and persisted
        #--------------------------------------------------------------------------------

        self.launch_producer(stream_id)
        self.wait_until_we_have_enough_granules(dataset_id,4)
        
        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------
        
        replay_data = self.data_retriever.retrieve(dataset_id)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:])
        self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all())

        
        #--------------------------------------------------------------------------------
        # Now to try the streamed approach
        #--------------------------------------------------------------------------------
        replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)
        self.replay_id, process_id =  self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id)
        log.info('Process ID: %s', process_id)

        replay_client = ReplayClient(process_id)

    
        #--------------------------------------------------------------------------------
        # Create the listening endpoint for the the retriever to talk to 
        #--------------------------------------------------------------------------------
        xp = self.container.ex_manager.create_xp(self.exchange_point_name)
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        self.data_retriever.start_replay_agent(self.replay_id)

        self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched')
        replay_client.start_replay()
        
        self.assertTrue(self.event.wait(10))
        subscriber.stop()

        self.data_retriever.cancel_replay_agent(self.replay_id)


        #--------------------------------------------------------------------------------
        # Test the slicing capabilities
        #--------------------------------------------------------------------------------

        granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)})
        rdt = RecordDictionaryTool.load_from_granule(granule)
        b = rdt['time'] == np.arange(5)
        self.assertTrue(b.all() if not isinstance(b,bool) else b)



    def test_retrieve_and_transform(self):

        # Stream definition for the CTD data
        pdict_id             = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id        = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)
        ctd_stream_id, route = self.pubsub_management.create_stream('ctd stream', 'xp1', stream_definition_id=stream_def_id)


        # Stream definition for the salinity data
        salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        sal_stream_def_id = self.pubsub_management.create_stream_definition('sal data', parameter_dictionary_id=salinity_pdict_id)

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        #--------------------------------------------------------------------------------
        # Again with this ridiculous problem
        #--------------------------------------------------------------------------------
        self.get_datastore(dataset_id)
        self.ingestion_management.persist_data_stream(stream_id=ctd_stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['conductivity'] = np.random.randn(10) * 2 + 10

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        publisher.publish(rdt.to_granule())

        rdt['time'] = np.arange(10,20)

        publisher.publish(rdt.to_granule())


        self.wait_until_we_have_enough_granules(dataset_id, 2)

        granule = self.data_retriever.retrieve(dataset_id, 
                                             None,
                                             None, 
                                             'ion.processes.data.transforms.ctd.ctd_L2_salinity',
                                             'CTDL2SalinityTransformAlgorithm', 
                                             kwargs=dict(params=sal_stream_def_id))
        rdt = RecordDictionaryTool.load_from_granule(granule)
        for i in rdt['salinity']:
            self.assertNotEquals(i,0)



    def test_last_granule(self):
        #--------------------------------------------------------------------------------
        # Create the necessary configurations for the test
        #--------------------------------------------------------------------------------
        pdict_id          = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id     = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id)
        stream_id, route  = self.pubsub_management.create_stream('last_granule', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        config_id         = self.get_ingestion_config()
        dataset_id        = self.create_dataset(pdict_id)

        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id)
        #--------------------------------------------------------------------------------
        # Create the datastore first,
        #--------------------------------------------------------------------------------
        self.get_datastore(dataset_id)

        self.publish_hifi(stream_id,route, 0)
        self.publish_hifi(stream_id,route, 1)
        

        self.wait_until_we_have_enough_granules(dataset_id,2) # I just need two


        success = False
        def verifier():
                replay_granule = self.data_retriever.retrieve_last_granule(dataset_id)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(10) + 10
                if not isinstance(comp,bool):
                    return comp.all()
                return False
        success = poll(verifier)

        self.assertTrue(success)

        success = False
        def verify_points():
                replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id,5)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(15,20)
                if not isinstance(comp,bool):
                    return comp.all()
                return False
        success = poll(verify_points)

        self.assertTrue(success)



    def test_replay_with_parameters(self):
        #--------------------------------------------------------------------------------
        # Create the configurations and the dataset
        #--------------------------------------------------------------------------------
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        

        stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id)
        
        stream_id, route  = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        config_id  = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id)


        #--------------------------------------------------------------------------------
        # Coerce the datastore into existence (beats race condition)
        #--------------------------------------------------------------------------------
        self.get_datastore(dataset_id)

        self.launch_producer(stream_id)

        self.wait_until_we_have_enough_granules(dataset_id,4)

        query = {
            'start_time': 0,
            'end_time':   20,
            'stride_time' : 2,
            'parameters': ['time','temp']
        }
        retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query)

        rdt = RecordDictionaryTool.load_from_granule(retrieved_data)
        comp = np.arange(0,20,2) == rdt['time']
        self.assertTrue(comp.all(),'%s' % rdt.pretty_print())
        self.assertEquals(set(rdt.iterkeys()), set(['time','temp']))

        extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp'])
        self.assertTrue(extents['time']>=20)
        self.assertTrue(extents['temp']>=20)



    def test_repersist_data(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition(name='ctd', parameter_dictionary_id=pdict_id)
        stream_id, route = self.pubsub_management.create_stream(name='repersist', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id)
        self.get_datastore(dataset_id)
        self.publish_hifi(stream_id,route,0)
        self.publish_hifi(stream_id,route,1)
        self.wait_until_we_have_enough_granules(dataset_id,2)
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id)
        self.publish_hifi(stream_id,route,2)
        self.publish_hifi(stream_id,route,3)
        self.wait_until_we_have_enough_granules(dataset_id,4)
        success = False
        with gevent.timeout.Timeout(5):
            while not success:

                replay_granule = self.data_retriever.retrieve(dataset_id)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(0,40)
                if not isinstance(comp,bool):
                    success = comp.all()
                gevent.sleep(1)

        self.assertTrue(success)
コード例 #17
0
    def _setup_resources(self):
        # TODO: some or all of this (or some variation) should move to DAMS'

        # Build the test resources for the dataset
        dms_cli = DatasetManagementServiceClient()
        dams_cli = DataAcquisitionManagementServiceClient()
        dpms_cli = DataProductManagementServiceClient()
        rr_cli = ResourceRegistryServiceClient()
        pubsub_cli = PubsubManagementServiceClient()

        eda = ExternalDatasetAgent(name='example eda',handler_module=self.DVR_CONFIG['dvr_mod'],
            handler_class=self.DVR_CONFIG['dvr_cls'])
        eda_id = dams_cli.create_external_dataset_agent(eda)

        eda_inst = ExternalDatasetAgentInstance(name='example eda instance')
        eda_inst_id = dams_cli.create_external_dataset_agent_instance(eda_inst,
            external_dataset_agent_id=eda_id)

        # Create and register the necessary resources/objects

        # Create DataProvider
        dprov = ExternalDataProvider(name='example data provider', institution=Institution(),
            contact=ContactInformation())
        dprov.contact.individual_names_given = 'Christopher Mueller'
        dprov.contact.email = '*****@*****.**'

        # Create DataSource
        dsrc = DataSource(name='example datasource', protocol_type='DAP', institution=Institution(),
            contact=ContactInformation())
        dsrc.connection_params['base_data_url'] = ''
        dsrc.contact.individual_names_given = 'Tim Giguere'
        dsrc.contact.email = '*****@*****.**'

        # Create ExternalDataset
        ds_name = 'usgs_test_dataset'
        dset = ExternalDataset(name=ds_name,
            dataset_description=DatasetDescription(),
            update_description=UpdateDescription(),
            contact=ContactInformation())

        # The usgs.nc test dataset is a download of the R1 dataset found here:
        # http://thredds-test.oceanobservatories.org/thredds/dodsC/ooiciData/E66B1A74-A684-454A-9ADE-8388C2C634E5.ncml
        dset.dataset_description.parameters['dataset_path'] = 'test_data/usgs.nc'
        dset.dataset_description.parameters['temporal_dimension'] = 'time'
        dset.dataset_description.parameters['zonal_dimension'] = 'lon'
        dset.dataset_description.parameters['meridional_dimension'] = 'lat'
        dset.dataset_description.parameters['vertical_dimension'] = 'z'
        dset.dataset_description.parameters['variables'] = [
            'water_temperature',
            'streamflow',
            'water_temperature_bottom',
            'water_temperature_middle',
            'specific_conductance',
            'data_qualifier', ]

        # Create DataSourceModel
        dsrc_model = DataSourceModel(name='dap_model')
        #dsrc_model.model = 'DAP'
        dsrc_model.data_handler_module = 'N/A'
        dsrc_model.data_handler_class = 'N/A'

        ## Run everything through DAMS
        ds_id = dams_cli.create_external_dataset(external_dataset=dset)
        ext_dprov_id = dams_cli.create_external_data_provider(external_data_provider=dprov)
        ext_dsrc_id = dams_cli.create_data_source(data_source=dsrc)
        ext_dsrc_model_id = dams_cli.create_data_source_model(dsrc_model)

        # Register the ExternalDataset
        dproducer_id = dams_cli.register_external_data_set(external_dataset_id=ds_id)

        # Or using each method
        dams_cli.assign_data_source_to_external_data_provider(data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id)
        dams_cli.assign_data_source_to_data_model(data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id)
        dams_cli.assign_external_dataset_to_data_source(external_dataset_id=ds_id, data_source_id=ext_dsrc_id)
        dams_cli.assign_external_dataset_to_agent_instance(external_dataset_id=ds_id, agent_instance_id=eda_inst_id)
        #        dams_cli.assign_external_data_agent_to_agent_instance(external_data_agent_id=self.eda_id, agent_instance_id=self.eda_inst_id)

        #create temp streamdef so the data product can create the stream
        pc_list = []

        #Get 'time' parameter context
        pc_list.append(dms_cli.read_parameter_context_by_name('time', id_only=True))

        for pc_k, pc in self._create_parameter_dictionary().iteritems():
            pc_list.append(dms_cli.create_parameter_context(pc_k, pc[1].dump()))

        pdict_id = dms_cli.create_parameter_dictionary('netcdf_param_dict', pc_list)

        #create temp streamdef so the data product can create the stream
        streamdef_id = pubsub_cli.create_stream_definition(name="netcdf", description="netcdf", parameter_dictionary_id=pdict_id)

        tdom, sdom = time_series_domain()
        tdom, sdom = tdom.dump(), sdom.dump()

        dprod = IonObject(RT.DataProduct,
            name='usgs_parsed_product',
            description='parsed usgs product',
            temporal_domain=tdom,
            spatial_domain=sdom)

        # Generate the data product and associate it to the ExternalDataset
        dproduct_id = dpms_cli.create_data_product(data_product=dprod,
            stream_definition_id=streamdef_id)

        dams_cli.assign_data_product(input_resource_id=ds_id, data_product_id=dproduct_id)

        stream_id, assn = rr_cli.find_objects(subject=dproduct_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True)
        stream_id = stream_id[0]

        log.info('Created resources: {0}'.format({'ExternalDataset': ds_id, 'ExternalDataProvider': ext_dprov_id, 'DataSource': ext_dsrc_id, 'DataSourceModel': ext_dsrc_model_id, 'DataProducer': dproducer_id, 'DataProduct': dproduct_id, 'Stream': stream_id}))

        # Create the logger for receiving publications
        _, stream_route, _ = self.create_stream_and_logger(name='usgs', stream_id=stream_id)

        self.EDA_RESOURCE_ID = ds_id
        self.EDA_NAME = ds_name
        self.DVR_CONFIG['dh_cfg'] = {
            'TESTING': True,
            'stream_id': stream_id,
            'stream_route': stream_route,
            'stream_def': streamdef_id,
            'data_producer_id': dproducer_id,  # CBM: Should this be put in the main body of the config - with mod & cls?
            'max_records': 1,
            }
コード例 #18
0
class TestDMEnd2End(IonIntegrationTestCase):
    def setUp(self):  # Love the non pep-8 convention
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.pubsub_management = PubsubManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.ingestion_management = IngestionManagementServiceClient()
        self.data_retriever = DataRetrieverServiceClient()
        self.pids = []
        self.event = Event()
        self.exchange_space_name = 'test_granules'
        self.exchange_point_name = 'science_data'
        self.i = 0

        self.purge_queues()
        self.queue_buffer = []
        self.streams = []
        self.addCleanup(self.stop_all_ingestion)

    def purge_queues(self):
        xn = self.container.ex_manager.create_xn_queue(
            'science_granule_ingestion')
        xn.purge()

    def tearDown(self):
        self.purge_queues()
        for pid in self.pids:
            self.container.proc_manager.terminate_process(pid)
        IngestionManagementIntTest.clean_subscriptions()
        for queue in self.queue_buffer:
            if isinstance(queue, ExchangeNameQueue):
                queue.delete()
            elif isinstance(queue, str):
                xn = self.container.ex_manager.create_xn_queue(queue)
                xn.delete()

    #--------------------------------------------------------------------------------
    # Helper/Utility methods
    #--------------------------------------------------------------------------------

    def create_dataset(self, parameter_dict_id=''):
        '''
        Creates a time-series dataset
        '''
        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()
        if not parameter_dict_id:
            parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name(
                'ctd_parsed_param_dict', id_only=True)

        dataset_id = self.dataset_management.create_dataset(
            'test_dataset_%i' % self.i,
            parameter_dictionary_id=parameter_dict_id,
            spatial_domain=sdom,
            temporal_domain=tdom)
        return dataset_id

    def get_datastore(self, dataset_id):
        '''
        Gets an instance of the datastore
            This method is primarily used to defeat a bug where integration tests in multiple containers may sometimes 
            delete a CouchDB datastore and the other containers are unaware of the new state of the datastore.
        '''
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(
            datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore

    def get_ingestion_config(self):
        '''
        Grab the ingestion configuration from the resource registry
        '''
        # The ingestion configuration should have been created by the bootstrap service
        # which is configured through r2deploy.yml

        ingest_configs, _ = self.resource_registry.find_resources(
            restype=RT.IngestionConfiguration, id_only=True)
        return ingest_configs[0]

    def launch_producer(self, stream_id=''):
        '''
        Launch the producer
        '''

        pid = self.container.spawn_process(
            'better_data_producer', 'ion.processes.data.example_data_producer',
            'BetterDataProducer', {'process': {
                'stream_id': stream_id
            }})

        self.pids.append(pid)

    def make_simple_dataset(self):
        '''
        Makes a stream, a stream definition and a dataset, the essentials for most of these tests
        '''
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition(
            'ctd data', parameter_dictionary_id=pdict_id)
        stream_id, route = self.pubsub_management.create_stream(
            'ctd stream %i' % self.i,
            'xp1',
            stream_definition_id=stream_def_id)

        dataset_id = self.create_dataset(pdict_id)

        self.get_datastore(dataset_id)
        self.i += 1
        return stream_id, route, stream_def_id, dataset_id

    def publish_hifi(self, stream_id, stream_route, offset=0):
        '''
        Publish deterministic data
        '''

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(
            stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10) + (offset * 10)
        rdt['temp'] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())

    def publish_fake_data(self, stream_id, route):
        '''
        Make four granules
        '''
        for i in xrange(4):
            self.publish_hifi(stream_id, route, i)

    def start_ingestion(self, stream_id, dataset_id):
        '''
        Starts ingestion/persistence for a given dataset
        '''
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id,
            ingestion_configuration_id=ingest_config_id,
            dataset_id=dataset_id)

    def stop_ingestion(self, stream_id):
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=ingest_config_id)

    def stop_all_ingestion(self):
        try:
            [self.stop_ingestion(sid) for sid in self.streams]
        except:
            pass

    def validate_granule_subscription(self, msg, route, stream_id):
        '''
        Validation for granule format
        '''
        if msg == {}:
            return
        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.info('%s', rdt.pretty_print())
        self.assertIsInstance(
            msg, Granule, 'Message is improperly formatted. (%s)' % type(msg))
        self.event.set()

    def wait_until_we_have_enough_granules(self, dataset_id='', data_size=40):
        '''
        Loops until there is a sufficient amount of data in the dataset
        '''
        done = False
        with gevent.Timeout(40):
            while not done:
                extents = self.dataset_management.dataset_extents(
                    dataset_id, 'time')[0]
                granule = self.data_retriever.retrieve_last_data_points(
                    dataset_id, 1)
                rdt = RecordDictionaryTool.load_from_granule(granule)
                if rdt['time'] and rdt['time'][0] != rdt._pdict.get_context(
                        'time').fill_value and extents >= data_size:
                    done = True
                else:
                    gevent.sleep(0.2)

    #--------------------------------------------------------------------------------
    # Test Methods
    #--------------------------------------------------------------------------------

    @attr('SMOKE')
    def test_dm_end_2_end(self):
        #--------------------------------------------------------------------------------
        # Set up a stream and have a mock instrument (producer) send data
        #--------------------------------------------------------------------------------
        self.event.clear()

        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(
            pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary', param_type=ArrayType())
        context_ids.append(
            self.dataset_management.create_parameter_context(
                'binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(
            self.dataset_management.create_parameter_context(
                'records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary(
            'replay_pdict',
            parameter_context_ids=context_ids,
            temporal_context='time')

        stream_definition = self.pubsub_management.create_stream_definition(
            'ctd data', parameter_dictionary_id=pdict_id)

        stream_id, route = self.pubsub_management.create_stream(
            'producer',
            exchange_point=self.exchange_point_name,
            stream_definition_id=stream_definition)

        #--------------------------------------------------------------------------------
        # Start persisting the data on the stream
        # - Get the ingestion configuration from the resource registry
        # - Create the dataset
        # - call persist_data_stream to setup the subscription for the ingestion workers
        #   on the stream that you specify which causes the data to be persisted
        #--------------------------------------------------------------------------------

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id,
            ingestion_configuration_id=ingest_config_id,
            dataset_id=dataset_id)

        #--------------------------------------------------------------------------------
        # Now the granules are ingesting and persisted
        #--------------------------------------------------------------------------------

        self.launch_producer(stream_id)
        self.wait_until_we_have_enough_granules(dataset_id, 40)

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_id)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),
                        '%s' % rdt['time'][:])
        self.assertTrue((rdt['binary'][:10] == np.array(['hi'] * 10,
                                                        dtype='object')).all())

        #--------------------------------------------------------------------------------
        # Now to try the streamed approach
        #--------------------------------------------------------------------------------
        replay_stream_id, replay_route = self.pubsub_management.create_stream(
            'replay_out',
            exchange_point=self.exchange_point_name,
            stream_definition_id=stream_definition)
        self.replay_id, process_id = self.data_retriever.define_replay(
            dataset_id=dataset_id, stream_id=replay_stream_id)
        log.info('Process ID: %s', process_id)

        replay_client = ReplayClient(process_id)

        #--------------------------------------------------------------------------------
        # Create the listening endpoint for the the retriever to talk to
        #--------------------------------------------------------------------------------
        xp = self.container.ex_manager.create_xp(self.exchange_point_name)
        subscriber = StandaloneStreamSubscriber(
            self.exchange_space_name, self.validate_granule_subscription)
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        self.data_retriever.start_replay_agent(self.replay_id)

        self.assertTrue(replay_client.await_agent_ready(5),
                        'The process never launched')
        replay_client.start_replay()

        self.assertTrue(self.event.wait(10))
        subscriber.stop()

        self.data_retriever.cancel_replay_agent(self.replay_id)

        #--------------------------------------------------------------------------------
        # Test the slicing capabilities
        #--------------------------------------------------------------------------------

        granule = self.data_retriever.retrieve(dataset_id=dataset_id,
                                               query={'tdoa': slice(0, 5)})
        rdt = RecordDictionaryTool.load_from_granule(granule)
        b = rdt['time'] == np.arange(5)
        self.assertTrue(b.all() if not isinstance(b, bool) else b)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    @unittest.skip('Doesnt work')
    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_replay_pause(self):
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(
            pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary', param_type=ArrayType())
        context_ids.append(
            self.dataset_management.create_parameter_context(
                'binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(
            self.dataset_management.create_parameter_context(
                'records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary(
            'replay_pdict',
            parameter_context_ids=context_ids,
            temporal_context='time')

        stream_def_id = self.pubsub_management.create_stream_definition(
            'replay_stream', parameter_dictionary_id=pdict_id)
        replay_stream, replay_route = self.pubsub_management.create_stream(
            'replay', 'xp1', stream_definition_id=stream_def_id)
        dataset_id = self.create_dataset(pdict_id)
        scov = DatasetManagementService._get_coverage(dataset_id)

        bb = CoverageCraft(scov)
        bb.rdt['time'] = np.arange(100)
        bb.rdt['temp'] = np.random.random(100) + 30
        bb.sync_with_granule()

        DatasetManagementService._persist_coverage(
            dataset_id,
            bb.coverage)  # This invalidates it for multi-host configurations
        # Set up the subscriber to verify the data
        subscriber = StandaloneStreamSubscriber(
            self.exchange_space_name, self.validate_granule_subscription)
        xp = self.container.ex_manager.create_xp('xp1')
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        # Set up the replay agent and the client wrapper

        # 1) Define the Replay (dataset and stream to publish on)
        self.replay_id, process_id = self.data_retriever.define_replay(
            dataset_id=dataset_id, stream_id=replay_stream)
        # 2) Make a client to the interact with the process (optionall provide it a process to bind with)
        replay_client = ReplayClient(process_id)
        # 3) Start the agent (launch the process)
        self.data_retriever.start_replay_agent(self.replay_id)
        # 4) Start replaying...
        replay_client.start_replay()

        # Wait till we get some granules
        self.assertTrue(self.event.wait(5))

        # We got granules, pause the replay, clear the queue and allow the process to finish consuming
        replay_client.pause_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure there's no remaining messages being consumed
        self.assertFalse(self.event.wait(1))

        # Resume the replay and wait until we start getting granules again
        replay_client.resume_replay()
        self.assertTrue(self.event.wait(5))

        # Stop the replay, clear the queues
        replay_client.stop_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure that it did indeed stop
        self.assertFalse(self.event.wait(1))

        subscriber.stop()

    def test_retrieve_and_transform(self):
        # Make a simple dataset and start ingestion, pretty standard stuff.
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        self.start_ingestion(ctd_stream_id, dataset_id)

        # Stream definition for the salinity data
        salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        sal_stream_def_id = self.pubsub_management.create_stream_definition(
            'sal data', parameter_dictionary_id=salinity_pdict_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['conductivity'] = np.random.randn(10) * 2 + 10
        rdt['pressure'] = np.random.randn(10) * 1 + 12

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        publisher.publish(rdt.to_granule())

        rdt['time'] = np.arange(10, 20)

        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id, 20)

        granule = self.data_retriever.retrieve(
            dataset_id,
            None,
            None,
            'ion.processes.data.transforms.ctd.ctd_L2_salinity',
            'CTDL2SalinityTransformAlgorithm',
            kwargs=dict(params=sal_stream_def_id))
        rdt = RecordDictionaryTool.load_from_granule(granule)
        for i in rdt['salinity']:
            self.assertNotEquals(i, 0)
        self.streams.append(ctd_stream_id)
        self.stop_ingestion(ctd_stream_id)

    def test_last_granule(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        self.start_ingestion(stream_id, dataset_id)

        self.publish_hifi(stream_id, route, 0)
        self.publish_hifi(stream_id, route, 1)

        self.wait_until_we_have_enough_granules(dataset_id,
                                                20)  # I just need two

        success = False

        def verifier():
            replay_granule = self.data_retriever.retrieve_last_data_points(
                dataset_id, 10)

            rdt = RecordDictionaryTool.load_from_granule(replay_granule)

            comp = rdt['time'] == np.arange(10) + 10
            if not isinstance(comp, bool):
                return comp.all()
            return False

        success = poll(verifier)

        self.assertTrue(success)

        success = False

        def verify_points():
            replay_granule = self.data_retriever.retrieve_last_data_points(
                dataset_id, 5)

            rdt = RecordDictionaryTool.load_from_granule(replay_granule)

            comp = rdt['time'] == np.arange(15, 20)
            if not isinstance(comp, bool):
                return comp.all()
            return False

        success = poll(verify_points)

        self.assertTrue(success)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    def test_replay_with_parameters(self):
        #--------------------------------------------------------------------------------
        # Create the configurations and the dataset
        #--------------------------------------------------------------------------------
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(
            pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary', param_type=ArrayType())
        context_ids.append(
            self.dataset_management.create_parameter_context(
                'binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(
            self.dataset_management.create_parameter_context(
                'records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary(
            'replay_pdict',
            parameter_context_ids=context_ids,
            temporal_context='time')

        stream_def_id = self.pubsub_management.create_stream_definition(
            'replay_stream', parameter_dictionary_id=pdict_id)

        stream_id, route = self.pubsub_management.create_stream(
            'replay_with_params',
            exchange_point=self.exchange_point_name,
            stream_definition_id=stream_def_id)
        config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id,
            ingestion_configuration_id=config_id,
            dataset_id=dataset_id)

        dataset_modified = Event()

        def cb(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb,
                             origin=dataset_id)
        es.start()

        self.addCleanup(es.stop)

        self.publish_fake_data(stream_id, route)

        self.assertTrue(dataset_modified.wait(30))

        query = {
            'start_time': 0 - 2208988800,
            'end_time': 20 - 2208988800,
            'stride_time': 2,
            'parameters': ['time', 'temp']
        }
        retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,
                                                      query=query)

        rdt = RecordDictionaryTool.load_from_granule(retrieved_data)
        comp = np.arange(0, 20, 2) == rdt['time']
        self.assertTrue(comp.all(), '%s' % rdt.pretty_print())
        self.assertEquals(set(rdt.iterkeys()), set(['time', 'temp']))

        extents = self.dataset_management.dataset_extents(
            dataset_id=dataset_id, parameters=['time', 'temp'])
        self.assertTrue(extents['time'] >= 20)
        self.assertTrue(extents['temp'] >= 20)

        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    def test_repersist_data(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        self.start_ingestion(stream_id, dataset_id)
        self.publish_hifi(stream_id, route, 0)
        self.publish_hifi(stream_id, route, 1)
        self.wait_until_we_have_enough_granules(dataset_id, 20)
        config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=config_id)
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id,
            ingestion_configuration_id=config_id,
            dataset_id=dataset_id)
        self.publish_hifi(stream_id, route, 2)
        self.publish_hifi(stream_id, route, 3)
        self.wait_until_we_have_enough_granules(dataset_id, 40)
        success = False
        with gevent.timeout.Timeout(5):
            while not success:

                replay_granule = self.data_retriever.retrieve(dataset_id)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(0, 40)
                if not isinstance(comp, bool):
                    success = comp.all()
                gevent.sleep(1)

        self.assertTrue(success)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv(
        'CEI_LAUNCH_TEST', False
    ), 'Host requires file-system access to coverage files, CEI mode does not support.'
                     )
    def test_correct_time(self):

        # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e.
        #  the conversion factor between unix and NTP time
        unix_now = np.floor(time.time())
        ntp_now = unix_now + 2208988800

        unix_ago = unix_now - 20
        ntp_ago = unix_ago + 2208988800

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        coverage = DatasetManagementService._get_coverage(dataset_id)
        coverage.insert_timesteps(20)
        coverage.set_parameter_values('time', np.arange(ntp_ago, ntp_now))

        temporal_bounds = self.dataset_management.dataset_temporal_bounds(
            dataset_id)

        self.assertTrue(np.abs(temporal_bounds[0] - unix_ago) < 2)
        self.assertTrue(np.abs(temporal_bounds[1] - unix_now) < 2)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv(
        'CEI_LAUNCH_TEST', False
    ), 'Host requires file-system access to coverage files, CEI mode does not support.'
                     )
    def test_empty_coverage_time(self):

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        coverage = DatasetManagementService._get_coverage(dataset_id)
        temporal_bounds = self.dataset_management.dataset_temporal_bounds(
            dataset_id)
        self.assertEquals([coverage.get_parameter_context('time').fill_value] *
                          2, temporal_bounds)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv(
        'CEI_LAUNCH_TEST', False
    ), 'Host requires file-system access to coverage files, CEI mode does not support.'
                     )
    def test_out_of_band_retrieve(self):
        # Setup the environemnt
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        self.start_ingestion(stream_id, dataset_id)

        # Fill the dataset
        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id, 40)

        # Retrieve the data
        granule = DataRetrieverService.retrieve_oob(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assertTrue((rdt['time'] == np.arange(40)).all())

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv(
        'CEI_LAUNCH_TEST', False
    ), 'Host requires file-system access to coverage files, CEI mode does not support.'
                     )
    def test_retrieve_cache(self):
        DataRetrieverService._refresh_interval = 1
        datasets = [self.make_simple_dataset() for i in xrange(10)]
        for stream_id, route, stream_def_id, dataset_id in datasets:
            coverage = DatasetManagementService._get_coverage(dataset_id)
            coverage.insert_timesteps(10)
            coverage.set_parameter_values('time', np.arange(10))
            coverage.set_parameter_values('temp', np.arange(10))

        # Verify cache hit and refresh
        dataset_ids = [i[3] for i in datasets]
        self.assertTrue(
            dataset_ids[0] not in DataRetrieverService._retrieve_cache)
        DataRetrieverService._get_coverage(dataset_ids[0])  # Hit the chache
        cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        # Verify that it was hit and it's now in there
        self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache)

        gevent.sleep(DataRetrieverService._refresh_interval + 0.2)

        DataRetrieverService._get_coverage(dataset_ids[0])  # Hit the chache
        cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        self.assertTrue(age2 != age)

        for dataset_id in dataset_ids:
            DataRetrieverService._get_coverage(dataset_id)

        self.assertTrue(
            dataset_ids[0] not in DataRetrieverService._retrieve_cache)

        stream_id, route, stream_def, dataset_id = datasets[0]
        self.start_ingestion(stream_id, dataset_id)
        DataRetrieverService._get_coverage(dataset_id)

        self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache)

        DataRetrieverService._refresh_interval = 100
        self.publish_hifi(stream_id, route, 1)
        self.wait_until_we_have_enough_granules(dataset_id, data_size=20)

        event = gevent.event.Event()
        with gevent.Timeout(20):
            while not event.wait(0.1):
                if dataset_id not in DataRetrieverService._retrieve_cache:
                    event.set()

        self.assertTrue(event.is_set())

    @unittest.skip('Outdated due to ingestion retry')
    @attr('LOCOINT')
    @unittest.skipIf(os.getenv(
        'CEI_LAUNCH_TEST', False
    ), 'Host requires file-system access to coverage files, CEI mode does not support.'
                     )
    def test_ingestion_failover(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        self.start_ingestion(stream_id, dataset_id)

        event = Event()

        def cb(*args, **kwargs):
            event.set()

        sub = EventSubscriber(event_type="ExceptionEvent",
                              callback=cb,
                              origin="stream_exception")
        sub.start()

        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id, 40)

        file_path = DatasetManagementService._get_coverage_path(dataset_id)
        master_file = os.path.join(file_path, '%s_master.hdf5' % dataset_id)

        with open(master_file, 'w') as f:
            f.write('this will crash HDF')

        self.publish_hifi(stream_id, route, 5)

        self.assertTrue(event.wait(10))

        sub.stop()
コード例 #19
0
ファイル: test_pubsub.py プロジェクト: mbarry02/coi-services
class PubsubManagementIntTest(IonIntegrationTestCase):

    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.pubsub_management       = PubsubManagementServiceClient()
        self.resource_registry       = ResourceRegistryServiceClient()
        self.dataset_management      = DatasetManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()

        self.pdicts = {}
        self.queue_cleanup = list()
        self.exchange_cleanup = list()
        self.context_ids = set()

    def tearDown(self):
        for queue in self.queue_cleanup:
            xn = self.container.ex_manager.create_xn_queue(queue)
            xn.delete()
        for exchange in self.exchange_cleanup:
            xp = self.container.ex_manager.create_xp(exchange)
            xp.delete()

        self.cleanup_contexts()
    
    def test_stream_def_crud(self):

        # Test Creation
        pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict')
        stream_definition_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict.identifier)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_definition_id)

        # Make sure there is an assoc
        self.assertTrue(self.resource_registry.find_associations(subject=stream_definition_id, predicate=PRED.hasParameterDictionary, object=pdict.identifier, id_only=True))

        # Test Reading
        stream_definition = self.pubsub_management.read_stream_definition(stream_definition_id)
        self.assertTrue(PubsubManagementService._compare_pdicts(pdict.dump(), stream_definition.parameter_dictionary))


        # Test comparisons
        in_stream_definition_id = self.pubsub_management.create_stream_definition('L0 products', parameter_dictionary_id=pdict.identifier, available_fields=['time','temp','conductivity','pressure'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_definition_id)

        out_stream_definition_id = in_stream_definition_id
        self.assertTrue(self.pubsub_management.compare_stream_definition(in_stream_definition_id, out_stream_definition_id))
        self.assertTrue(self.pubsub_management.compatible_stream_definitions(in_stream_definition_id, out_stream_definition_id))

        out_stream_definition_id = self.pubsub_management.create_stream_definition('L2 Products', parameter_dictionary_id=pdict.identifier, available_fields=['time','salinity','density'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_definition_id)
        self.assertFalse(self.pubsub_management.compare_stream_definition(in_stream_definition_id, out_stream_definition_id))

        self.assertTrue(self.pubsub_management.compatible_stream_definitions(in_stream_definition_id, out_stream_definition_id))

    @unittest.skip('Needs to be refactored for cleanup')
    def test_validate_stream_defs(self):
        self.addCleanup(self.cleanup_contexts)
        #test no input 
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = []
        available_fields_out = []
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_0', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_0', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)
    
        #test input with no output
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = []
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_1', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_1', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)
        
        #test available field missing parameter context definition -- missing PRESWAT_L0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['DENSITY']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_2', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_2', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)

        #test l1 from l0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_3', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_3', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)

        #test l2 from l0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1', 'DENSITY', 'PRACSAL'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_4', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_4', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)
        
        #test Ln from L0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['DENSITY','PRACSAL','TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_5', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_5', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)
        
        #test L2 from L1
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        outgoing_pdict_id = self._get_pdict(['DENSITY','PRACSAL','TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_6', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_6', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)
        
        #test L1 from L0 missing L0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON'])
        outgoing_pdict_id = self._get_pdict(['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_7', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_7', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)
        
        #test L2 from L0 missing L0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON'])
        outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_8', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_8', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)
        
        #test L2 from L0 missing L1
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_9', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_9', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)
    
    def publish_on_stream(self, stream_id, msg):
        stream = self.pubsub_management.read_stream(stream_id)
        stream_route = stream.stream_route
        publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route)
        publisher.publish(msg)

    def test_stream_crud(self):
        stream_def_id = self.pubsub_management.create_stream_definition('test_definition', stream_type='stream')
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        topic_id = self.pubsub_management.create_topic(name='test_topic', exchange_point='test_exchange')
        self.addCleanup(self.pubsub_management.delete_topic, topic_id)
        self.exchange_cleanup.append('test_exchange')
        topic2_id = self.pubsub_management.create_topic(name='another_topic', exchange_point='outside')
        self.addCleanup(self.pubsub_management.delete_topic, topic2_id)
        stream_id, route = self.pubsub_management.create_stream(name='test_stream', topic_ids=[topic_id, topic2_id], exchange_point='test_exchange', stream_definition_id=stream_def_id)

        topics, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasTopic, id_only=True)
        self.assertEquals(topics,[topic_id])

        defs, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True)
        self.assertTrue(len(defs))

        stream = self.pubsub_management.read_stream(stream_id)
        self.assertEquals(stream.name,'test_stream')
        self.pubsub_management.delete_stream(stream_id)
        
        with self.assertRaises(NotFound):
            self.pubsub_management.read_stream(stream_id)

        defs, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True)
        self.assertFalse(len(defs))

        topics, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasTopic, id_only=True)
        self.assertFalse(len(topics))



    def test_data_product_subscription(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        tdom, sdom = time_series_domain()
        dp = DataProduct(name='ctd parsed')
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()

        data_product_id = self.data_product_management.create_data_product(data_product=dp, stream_definition_id=stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)

        subscription_id = self.pubsub_management.create_subscription('validator', data_product_ids=[data_product_id])
        self.addCleanup(self.pubsub_management.delete_subscription, subscription_id)

        validated = Event()
        def validation(msg, route, stream_id):
            validated.set()

        stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        dp_stream_id = stream_ids.pop()

        validator = StandaloneStreamSubscriber('validator', callback=validation)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub_management.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, subscription_id)

        route = self.pubsub_management.read_stream_route(dp_stream_id)

        publisher = StandaloneStreamPublisher(dp_stream_id, route)
        publisher.publish('hi')
        self.assertTrue(validated.wait(10))
            

    def test_subscription_crud(self):
        stream_def_id = self.pubsub_management.create_stream_definition('test_definition', stream_type='stream')
        stream_id, route = self.pubsub_management.create_stream(name='test_stream', exchange_point='test_exchange', stream_definition_id=stream_def_id)
        subscription_id = self.pubsub_management.create_subscription(name='test subscription', stream_ids=[stream_id], exchange_name='test_queue')
        self.exchange_cleanup.append('test_exchange')

        subs, assocs = self.resource_registry.find_objects(subject=subscription_id,predicate=PRED.hasStream,id_only=True)
        self.assertEquals(subs,[stream_id])

        res, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='test_queue', id_only=True)
        self.assertEquals(len(res),1)

        subs, assocs = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True)
        self.assertEquals(subs[0], res[0])

        subscription = self.pubsub_management.read_subscription(subscription_id)
        self.assertEquals(subscription.exchange_name, 'test_queue')

        self.pubsub_management.delete_subscription(subscription_id)
        
        subs, assocs = self.resource_registry.find_objects(subject=subscription_id,predicate=PRED.hasStream,id_only=True)
        self.assertFalse(len(subs))

        subs, assocs = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True)
        self.assertFalse(len(subs))


        self.pubsub_management.delete_stream(stream_id)
        self.pubsub_management.delete_stream_definition(stream_def_id)

    def test_move_before_activate(self):
        stream_id, route = self.pubsub_management.create_stream(name='test_stream', exchange_point='test_xp')

        #--------------------------------------------------------------------------------
        # Test moving before activate
        #--------------------------------------------------------------------------------

        subscription_id = self.pubsub_management.create_subscription('first_queue', stream_ids=[stream_id])

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='first_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True)
        self.assertEquals(xn_ids[0], subjects[0])

        self.pubsub_management.move_subscription(subscription_id, exchange_name='second_queue')

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='second_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True)

        self.assertEquals(len(subjects),1)
        self.assertEquals(subjects[0], xn_ids[0])

        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)

    def test_move_activated_subscription(self):

        stream_id, route = self.pubsub_management.create_stream(name='test_stream', exchange_point='test_xp')
        #--------------------------------------------------------------------------------
        # Test moving after activate
        #--------------------------------------------------------------------------------

        subscription_id = self.pubsub_management.create_subscription('first_queue', stream_ids=[stream_id])
        self.pubsub_management.activate_subscription(subscription_id)

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='first_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True)
        self.assertEquals(xn_ids[0], subjects[0])

        self.verified = Event()

        def verify(m,r,s):
            self.assertEquals(m,'verified')
            self.verified.set()

        subscriber = StandaloneStreamSubscriber('second_queue', verify)
        subscriber.start()

        self.pubsub_management.move_subscription(subscription_id, exchange_name='second_queue')

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='second_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True)

        self.assertEquals(len(subjects),1)
        self.assertEquals(subjects[0], xn_ids[0])

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish('verified')

        self.assertTrue(self.verified.wait(2))

        self.pubsub_management.deactivate_subscription(subscription_id)

        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)

    def test_queue_cleanup(self):
        stream_id, route = self.pubsub_management.create_stream('test_stream','xp1')
        xn_objs, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='queue1')
        for xn_obj in xn_objs:
            xn = self.container.ex_manager.create_xn_queue(xn_obj.name)
            xn.delete()
        subscription_id = self.pubsub_management.create_subscription('queue1',stream_ids=[stream_id])
        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='queue1')
        self.assertEquals(len(xn_ids),1)

        self.pubsub_management.delete_subscription(subscription_id)

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='queue1')
        self.assertEquals(len(xn_ids),0)

    def test_activation_and_deactivation(self):
        stream_id, route = self.pubsub_management.create_stream('stream1','xp1')
        subscription_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id])

        self.check1 = Event()

        def verifier(m,r,s):
            self.check1.set()


        subscriber = StandaloneStreamSubscriber('sub1',verifier)
        subscriber.start()

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish('should not receive')

        self.assertFalse(self.check1.wait(0.25))

        self.pubsub_management.activate_subscription(subscription_id)

        publisher.publish('should receive')
        self.assertTrue(self.check1.wait(2))

        self.check1.clear()
        self.assertFalse(self.check1.is_set())

        self.pubsub_management.deactivate_subscription(subscription_id)

        publisher.publish('should not receive')
        self.assertFalse(self.check1.wait(0.5))

        self.pubsub_management.activate_subscription(subscription_id)

        publisher.publish('should receive')
        self.assertTrue(self.check1.wait(2))

        subscriber.stop()

        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)

        

    def test_topic_crud(self):

        topic_id = self.pubsub_management.create_topic(name='test_topic', exchange_point='test_xp')
        self.exchange_cleanup.append('test_xp')

        topic = self.pubsub_management.read_topic(topic_id)

        self.assertEquals(topic.name,'test_topic')
        self.assertEquals(topic.exchange_point, 'test_xp')

        self.pubsub_management.delete_topic(topic_id)
        with self.assertRaises(NotFound):
            self.pubsub_management.read_topic(topic_id)

    def test_full_pubsub(self):

        self.sub1_sat = Event()
        self.sub2_sat = Event()

        def subscriber1(m,r,s):
            self.sub1_sat.set()

        def subscriber2(m,r,s):
            self.sub2_sat.set()

        sub1 = StandaloneStreamSubscriber('sub1', subscriber1)
        sub1.start()
        self.addCleanup(sub1.stop)

        sub2 = StandaloneStreamSubscriber('sub2', subscriber2)
        sub2.start()
        self.addCleanup(sub2.stop)

        log_topic = self.pubsub_management.create_topic('instrument_logs', exchange_point='instruments')
        self.addCleanup(self.pubsub_management.delete_topic, log_topic)
        science_topic = self.pubsub_management.create_topic('science_data', exchange_point='instruments')
        self.addCleanup(self.pubsub_management.delete_topic, science_topic)
        events_topic = self.pubsub_management.create_topic('notifications', exchange_point='events')
        self.addCleanup(self.pubsub_management.delete_topic, events_topic)


        log_stream, route = self.pubsub_management.create_stream('instrument1-logs', topic_ids=[log_topic], exchange_point='instruments')
        self.addCleanup(self.pubsub_management.delete_stream, log_stream)
        ctd_stream, route = self.pubsub_management.create_stream('instrument1-ctd', topic_ids=[science_topic], exchange_point='instruments')
        self.addCleanup(self.pubsub_management.delete_stream, ctd_stream)
        event_stream, route = self.pubsub_management.create_stream('notifications', topic_ids=[events_topic], exchange_point='events')
        self.addCleanup(self.pubsub_management.delete_stream, event_stream)
        raw_stream, route = self.pubsub_management.create_stream('temp', exchange_point='global.data')
        self.addCleanup(self.pubsub_management.delete_stream, raw_stream)


        subscription1 = self.pubsub_management.create_subscription('subscription1', stream_ids=[log_stream,event_stream], exchange_name='sub1')
        self.addCleanup(self.pubsub_management.delete_subscription, subscription1)
        subscription2 = self.pubsub_management.create_subscription('subscription2', exchange_points=['global.data'], stream_ids=[ctd_stream], exchange_name='sub2')
        self.addCleanup(self.pubsub_management.delete_subscription, subscription2)

        self.pubsub_management.activate_subscription(subscription1)
        self.addCleanup(self.pubsub_management.deactivate_subscription, subscription1)
        self.pubsub_management.activate_subscription(subscription2)
        self.addCleanup(self.pubsub_management.deactivate_subscription, subscription2)

        self.publish_on_stream(log_stream, 1)
        self.assertTrue(self.sub1_sat.wait(4))
        self.assertFalse(self.sub2_sat.is_set())

        self.publish_on_stream(raw_stream,1)
        self.assertTrue(self.sub1_sat.wait(4))
    
    def test_topic_craziness(self):

        self.msg_queue = Queue()

        def subscriber1(m,r,s):
            self.msg_queue.put(m)

        sub1 = StandaloneStreamSubscriber('sub1', subscriber1)
        sub1.start()
        self.addCleanup(sub1.stop)

        topic1 = self.pubsub_management.create_topic('topic1', exchange_point='xp1')
        self.addCleanup(self.pubsub_management.delete_topic, topic1)
        topic2 = self.pubsub_management.create_topic('topic2', exchange_point='xp1', parent_topic_id=topic1)
        self.addCleanup(self.pubsub_management.delete_topic, topic2)
        topic3 = self.pubsub_management.create_topic('topic3', exchange_point='xp1', parent_topic_id=topic1)
        self.addCleanup(self.pubsub_management.delete_topic, topic3)
        topic4 = self.pubsub_management.create_topic('topic4', exchange_point='xp1', parent_topic_id=topic2)
        self.addCleanup(self.pubsub_management.delete_topic, topic4)
        topic5 = self.pubsub_management.create_topic('topic5', exchange_point='xp1', parent_topic_id=topic2)
        self.addCleanup(self.pubsub_management.delete_topic, topic5)
        topic6 = self.pubsub_management.create_topic('topic6', exchange_point='xp1', parent_topic_id=topic3)
        self.addCleanup(self.pubsub_management.delete_topic, topic6)
        topic7 = self.pubsub_management.create_topic('topic7', exchange_point='xp1', parent_topic_id=topic3)
        self.addCleanup(self.pubsub_management.delete_topic, topic7)

        # Tree 2
        topic8 = self.pubsub_management.create_topic('topic8', exchange_point='xp2')
        self.addCleanup(self.pubsub_management.delete_topic, topic8)
        topic9 = self.pubsub_management.create_topic('topic9', exchange_point='xp2', parent_topic_id=topic8)
        self.addCleanup(self.pubsub_management.delete_topic, topic9)
        topic10 = self.pubsub_management.create_topic('topic10', exchange_point='xp2', parent_topic_id=topic9)
        self.addCleanup(self.pubsub_management.delete_topic, topic10)
        topic11 = self.pubsub_management.create_topic('topic11', exchange_point='xp2', parent_topic_id=topic9)
        self.addCleanup(self.pubsub_management.delete_topic, topic11)
        topic12 = self.pubsub_management.create_topic('topic12', exchange_point='xp2', parent_topic_id=topic11)
        self.addCleanup(self.pubsub_management.delete_topic, topic12)
        topic13 = self.pubsub_management.create_topic('topic13', exchange_point='xp2', parent_topic_id=topic11)
        self.addCleanup(self.pubsub_management.delete_topic, topic13)
        self.exchange_cleanup.extend(['xp1','xp2'])
        
        stream1_id, route = self.pubsub_management.create_stream('stream1', topic_ids=[topic7, topic4, topic5], exchange_point='xp1')
        self.addCleanup(self.pubsub_management.delete_stream, stream1_id)
        stream2_id, route = self.pubsub_management.create_stream('stream2', topic_ids=[topic8], exchange_point='xp2')
        self.addCleanup(self.pubsub_management.delete_stream, stream2_id)
        stream3_id, route = self.pubsub_management.create_stream('stream3', topic_ids=[topic10,topic13], exchange_point='xp2')
        self.addCleanup(self.pubsub_management.delete_stream, stream3_id)
        stream4_id, route = self.pubsub_management.create_stream('stream4', topic_ids=[topic9], exchange_point='xp2')
        self.addCleanup(self.pubsub_management.delete_stream, stream4_id)
        stream5_id, route = self.pubsub_management.create_stream('stream5', topic_ids=[topic11], exchange_point='xp2')
        self.addCleanup(self.pubsub_management.delete_stream, stream5_id)

        subscription1 = self.pubsub_management.create_subscription('sub1', topic_ids=[topic1])
        self.addCleanup(self.pubsub_management.delete_subscription, subscription1)
        subscription2 = self.pubsub_management.create_subscription('sub2', topic_ids=[topic8], exchange_name='sub1')
        self.addCleanup(self.pubsub_management.delete_subscription, subscription2)
        subscription3 = self.pubsub_management.create_subscription('sub3', topic_ids=[topic9], exchange_name='sub1')
        self.addCleanup(self.pubsub_management.delete_subscription, subscription3)
        subscription4 = self.pubsub_management.create_subscription('sub4', topic_ids=[topic10,topic13, topic11], exchange_name='sub1')
        self.addCleanup(self.pubsub_management.delete_subscription, subscription4)
        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription1)

        self.publish_on_stream(stream1_id,1)

        self.assertEquals(self.msg_queue.get(timeout=10), 1)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.1)


        self.pubsub_management.deactivate_subscription(subscription1)
        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription2)
        
        self.publish_on_stream(stream2_id,2)
        self.assertEquals(self.msg_queue.get(timeout=10), 2)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.1)

        self.pubsub_management.deactivate_subscription(subscription2)

        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription3)

        self.publish_on_stream(stream2_id, 3)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.3)

        self.publish_on_stream(stream3_id, 4)
        self.assertEquals(self.msg_queue.get(timeout=10),4)


        self.pubsub_management.deactivate_subscription(subscription3)

        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription4)

        self.publish_on_stream(stream4_id, 5)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.3)

        self.publish_on_stream(stream5_id, 6)
        self.assertEquals(self.msg_queue.get(timeout=10),6)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.3)

        self.pubsub_management.deactivate_subscription(subscription4)
        
        #--------------------------------------------------------------------------------
    
    def cleanup_contexts(self):
        for context_id in self.context_ids:
            self.dataset_management.delete_parameter_context(context_id)

    def add_context_to_cleanup(self, context_id):
        self.context_ids.add(context_id)

    def _get_pdict(self, filter_values):
        t_ctxt = ParameterContext('TIME', param_type=QuantityType(value_encoding=np.dtype('int64')))
        t_ctxt.uom = 'seconds since 01-01-1900'
        t_ctxt_id = self.dataset_management.create_parameter_context(name='TIME', parameter_context=t_ctxt.dump(), parameter_type='quantity<int64>', units=t_ctxt.uom)
        self.add_context_to_cleanup(t_ctxt_id)

        lat_ctxt = ParameterContext('LAT', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999)
        lat_ctxt.axis = AxisTypeEnum.LAT
        lat_ctxt.uom = 'degree_north'
        lat_ctxt_id = self.dataset_management.create_parameter_context(name='LAT', parameter_context=lat_ctxt.dump(), parameter_type='quantity<float32>', units=lat_ctxt.uom)
        self.add_context_to_cleanup(lat_ctxt_id)


        lon_ctxt = ParameterContext('LON', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999)
        lon_ctxt.axis = AxisTypeEnum.LON
        lon_ctxt.uom = 'degree_east'
        lon_ctxt_id = self.dataset_management.create_parameter_context(name='LON', parameter_context=lon_ctxt.dump(), parameter_type='quantity<float32>', units=lon_ctxt.uom)
        self.add_context_to_cleanup(lon_ctxt_id)


        # Independent Parameters
         # Temperature - values expected to be the decimal results of conversion from hex
        temp_ctxt = ParameterContext('TEMPWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999)
        temp_ctxt.uom = 'deg_C'
        temp_ctxt_id = self.dataset_management.create_parameter_context(name='TEMPWAT_L0', parameter_context=temp_ctxt.dump(), parameter_type='quantity<float32>', units=temp_ctxt.uom)
        self.add_context_to_cleanup(temp_ctxt_id)


        # Conductivity - values expected to be the decimal results of conversion from hex
        cond_ctxt = ParameterContext('CONDWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999)
        cond_ctxt.uom = 'S m-1'
        cond_ctxt_id = self.dataset_management.create_parameter_context(name='CONDWAT_L0', parameter_context=cond_ctxt.dump(), parameter_type='quantity<float32>', units=cond_ctxt.uom)
        self.add_context_to_cleanup(cond_ctxt_id)


        # Pressure - values expected to be the decimal results of conversion from hex
        press_ctxt = ParameterContext('PRESWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999)
        press_ctxt.uom = 'dbar'
        press_ctxt_id = self.dataset_management.create_parameter_context(name='PRESWAT_L0', parameter_context=press_ctxt.dump(), parameter_type='quantity<float32>', units=press_ctxt.uom)
        self.add_context_to_cleanup(press_ctxt_id)


        # Dependent Parameters

        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        tl1_func = '(T / 10000) - 10'
        tl1_pmap = {'T': 'TEMPWAT_L0'}
        expr = NumexprFunction('TEMPWAT_L1', tl1_func, ['T'], param_map=tl1_pmap)
        tempL1_ctxt = ParameterContext('TEMPWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL)
        tempL1_ctxt.uom = 'deg_C'
        tempL1_ctxt_id = self.dataset_management.create_parameter_context(name=tempL1_ctxt.name, parameter_context=tempL1_ctxt.dump(), parameter_type='pfunc', units=tempL1_ctxt.uom)
        self.add_context_to_cleanup(tempL1_ctxt_id)


        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        cl1_func = '(C / 100000) - 0.5'
        cl1_pmap = {'C': 'CONDWAT_L0'}
        expr = NumexprFunction('CONDWAT_L1', cl1_func, ['C'], param_map=cl1_pmap)
        condL1_ctxt = ParameterContext('CONDWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL)
        condL1_ctxt.uom = 'S m-1'
        condL1_ctxt_id = self.dataset_management.create_parameter_context(name=condL1_ctxt.name, parameter_context=condL1_ctxt.dump(), parameter_type='pfunc', units=condL1_ctxt.uom)
        self.add_context_to_cleanup(condL1_ctxt_id)


        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)'
        pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721}
        expr = NumexprFunction('PRESWAT_L1', pl1_func, ['P', 'p_range'], param_map=pl1_pmap)
        presL1_ctxt = ParameterContext('PRESWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL)
        presL1_ctxt.uom = 'S m-1'
        presL1_ctxt_id = self.dataset_management.create_parameter_context(name=presL1_ctxt.name, parameter_context=presL1_ctxt.dump(), parameter_type='pfunc', units=presL1_ctxt.uom)
        self.add_context_to_cleanup(presL1_ctxt_id)


        # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project:
        #       https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1

        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        owner = 'gsw'
        sal_func = 'SP_from_C'
        sal_arglist = ['C', 't', 'p']
        sal_pmap = {'C': NumexprFunction('CONDWAT_L1*10', 'C*10', ['C'], param_map={'C': 'CONDWAT_L1'}), 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'}
        sal_kwargmap = None
        expr = PythonFunction('PRACSAL', owner, sal_func, sal_arglist, sal_kwargmap, sal_pmap)
        sal_ctxt = ParameterContext('PRACSAL', param_type=ParameterFunctionType(expr), variability=VariabilityEnum.TEMPORAL)
        sal_ctxt.uom = 'g kg-1'
        sal_ctxt_id = self.dataset_management.create_parameter_context(name=sal_ctxt.name, parameter_context=sal_ctxt.dump(), parameter_type='pfunc', units=sal_ctxt.uom)
        self.add_context_to_cleanup(sal_ctxt_id)


        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        owner = 'gsw'
        abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON','LAT'])
        cons_temp_expr = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1'])
        dens_expr = PythonFunction('DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1'])
        dens_ctxt = ParameterContext('DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL)
        dens_ctxt.uom = 'kg m-3'
        dens_ctxt_id = self.dataset_management.create_parameter_context(name=dens_ctxt.name, parameter_context=dens_ctxt.dump(), parameter_type='pfunc', units=dens_ctxt.uom)
        self.add_context_to_cleanup(dens_ctxt_id)

        
        ids = [t_ctxt_id, lat_ctxt_id, lon_ctxt_id, temp_ctxt_id, cond_ctxt_id, press_ctxt_id, tempL1_ctxt_id, condL1_ctxt_id, presL1_ctxt_id, sal_ctxt_id, dens_ctxt_id]
        contexts = [t_ctxt, lat_ctxt, lon_ctxt, temp_ctxt, cond_ctxt, press_ctxt, tempL1_ctxt, condL1_ctxt, presL1_ctxt, sal_ctxt, dens_ctxt]
        context_ids = [ids[i] for i,ctxt in enumerate(contexts) if ctxt.name in filter_values]
        pdict_name = '_'.join([ctxt.name for ctxt in contexts if ctxt.name in filter_values])

        try:
            self.pdicts[pdict_name]
            return self.pdicts[pdict_name]
        except KeyError:
            pdict_id = self.dataset_management.create_parameter_dictionary(pdict_name, parameter_context_ids=context_ids, temporal_context='time')
            self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)
            self.pdicts[pdict_name] = pdict_id
            return pdict_id
コード例 #20
0
class PubsubManagementIntTest(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url("res/deploy/r2deploy.yml")
        self.pubsub_management = PubsubManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()
        self.dataset_management = DatasetManagementServiceClient()

        self.queue_cleanup = list()
        self.exchange_cleanup = list()

    def tearDown(self):
        for queue in self.queue_cleanup:
            xn = self.container.ex_manager.create_xn_queue(queue)
            xn.delete()
        for exchange in self.exchange_cleanup:
            xp = self.container.ex_manager.create_xp(exchange)
            xp.delete()

    def test_stream_def_crud(self):

        # Test Creation
        pdict = DatasetManagementService.get_parameter_dictionary_by_name("ctd_parsed_param_dict")
        stream_definition_id = self.pubsub_management.create_stream_definition(
            "ctd parsed", parameter_dictionary_id=pdict.identifier
        )

        # Make sure there is an assoc
        self.assertTrue(
            self.resource_registry.find_associations(
                subject=stream_definition_id,
                predicate=PRED.hasParameterDictionary,
                object=pdict.identifier,
                id_only=True,
            )
        )

        # Test Reading
        stream_definition = self.pubsub_management.read_stream_definition(stream_definition_id)
        self.assertTrue(PubsubManagementService._compare_pdicts(pdict.dump(), stream_definition.parameter_dictionary))

        # Test Deleting
        self.pubsub_management.delete_stream_definition(stream_definition_id)
        self.assertFalse(
            self.resource_registry.find_associations(
                subject=stream_definition_id,
                predicate=PRED.hasParameterDictionary,
                object=pdict.identifier,
                id_only=True,
            )
        )

        # Test comparisons
        in_stream_definition_id = self.pubsub_management.create_stream_definition(
            "L0 products",
            parameter_dictionary=pdict.identifier,
            available_fields=["time", "temp", "conductivity", "pressure"],
        )
        self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_definition_id)

        out_stream_definition_id = in_stream_definition_id
        self.assertTrue(
            self.pubsub_management.compare_stream_definition(in_stream_definition_id, out_stream_definition_id)
        )
        self.assertTrue(
            self.pubsub_management.compatible_stream_definitions(in_stream_definition_id, out_stream_definition_id)
        )

        out_stream_definition_id = self.pubsub_management.create_stream_definition(
            "L2 Products", parameter_dictionary=pdict.identifier, available_fields=["time", "salinity", "density"]
        )
        self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_definition_id)
        self.assertFalse(
            self.pubsub_management.compare_stream_definition(in_stream_definition_id, out_stream_definition_id)
        )

        self.assertTrue(
            self.pubsub_management.compatible_stream_definitions(in_stream_definition_id, out_stream_definition_id)
        )

    def test_validate_stream_defs(self):

        # test no input
        incoming_pdict_id = self._get_pdict(["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"])
        outgoing_pdict_id = self._get_pdict(["DENSITY", "PRACSAL", "TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"])
        available_fields_in = []
        available_fields_out = []
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            "in_sd_0", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in
        )
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            "out_sd_0", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out
        )
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)

        # test input with no output
        incoming_pdict_id = self._get_pdict(["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"])
        outgoing_pdict_id = self._get_pdict(["DENSITY", "PRACSAL", "TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"])
        available_fields_in = ["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"]
        available_fields_out = []
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            "in_sd_1", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in
        )
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            "out_sd_1", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out
        )
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)

        # test available field missing parameter context definition -- missing PRESWAT_L0
        incoming_pdict_id = self._get_pdict(["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0"])
        outgoing_pdict_id = self._get_pdict(["DENSITY", "PRACSAL", "TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"])
        available_fields_in = ["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"]
        available_fields_out = ["DENSITY"]
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            "in_sd_2", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in
        )
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            "out_sd_2", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out
        )
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)

        # test l1 from l0
        incoming_pdict_id = self._get_pdict(["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"])
        outgoing_pdict_id = self._get_pdict(["TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"])
        available_fields_in = ["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"]
        available_fields_out = ["TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"]
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            "in_sd_3", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in
        )
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            "out_sd_3", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out
        )
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)

        # test l2 from l0
        incoming_pdict_id = self._get_pdict(["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"])
        outgoing_pdict_id = self._get_pdict(["TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1", "DENSITY", "PRACSAL"])
        available_fields_in = ["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"]
        available_fields_out = ["DENSITY", "PRACSAL"]
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            "in_sd_4", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in
        )
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            "out_sd_4", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out
        )
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)

        # test Ln from L0
        incoming_pdict_id = self._get_pdict(["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"])
        outgoing_pdict_id = self._get_pdict(["DENSITY", "PRACSAL", "TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"])
        available_fields_in = ["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"]
        available_fields_out = ["DENSITY", "PRACSAL", "TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"]
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            "in_sd_5", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in
        )
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            "out_sd_5", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out
        )
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)

        # test L2 from L1
        incoming_pdict_id = self._get_pdict(["time", "lat", "lon", "TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"])
        outgoing_pdict_id = self._get_pdict(["DENSITY", "PRACSAL", "TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"])
        available_fields_in = ["time", "lat", "lon", "TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"]
        available_fields_out = ["DENSITY", "PRACSAL"]
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            "in_sd_6", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in
        )
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            "out_sd_6", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out
        )
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)

        # test L1 from L0 missing L0
        incoming_pdict_id = self._get_pdict(["time", "lat", "lon"])
        outgoing_pdict_id = self._get_pdict(["TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"])
        available_fields_in = ["time", "lat", "lon"]
        available_fields_out = ["DENSITY", "PRACSAL"]
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            "in_sd_7", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in
        )
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            "out_sd_7", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out
        )
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)

        # test L2 from L0 missing L0
        incoming_pdict_id = self._get_pdict(["time", "lat", "lon"])
        outgoing_pdict_id = self._get_pdict(["DENSITY", "PRACSAL", "TEMPWAT_L1", "CONDWAT_L1", "PRESWAT_L1"])
        available_fields_in = ["time", "lat", "lon"]
        available_fields_out = ["DENSITY", "PRACSAL"]
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            "in_sd_8", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in
        )
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            "out_sd_8", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out
        )
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)

        # test L2 from L0 missing L1
        incoming_pdict_id = self._get_pdict(["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"])
        outgoing_pdict_id = self._get_pdict(["DENSITY", "PRACSAL"])
        available_fields_in = ["time", "lat", "lon", "TEMPWAT_L0", "CONDWAT_L0", "PRESWAT_L0"]
        available_fields_out = ["DENSITY", "PRACSAL"]
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            "in_sd_9", parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in
        )
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            "out_sd_9", parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out
        )
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)

    def publish_on_stream(self, stream_id, msg):
        stream = self.pubsub_management.read_stream(stream_id)
        stream_route = stream.stream_route
        publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route)
        publisher.publish(msg)

    def test_stream_crud(self):
        stream_def_id = self.pubsub_management.create_stream_definition("test_definition", stream_type="stream")
        topic_id = self.pubsub_management.create_topic(name="test_topic", exchange_point="test_exchange")
        self.exchange_cleanup.append("test_exchange")
        topic2_id = self.pubsub_management.create_topic(name="another_topic", exchange_point="outside")
        stream_id, route = self.pubsub_management.create_stream(
            name="test_stream",
            topic_ids=[topic_id, topic2_id],
            exchange_point="test_exchange",
            stream_definition_id=stream_def_id,
        )

        topics, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasTopic, id_only=True)
        self.assertEquals(topics, [topic_id])

        defs, assocs = self.resource_registry.find_objects(
            subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True
        )
        self.assertTrue(len(defs))

        stream = self.pubsub_management.read_stream(stream_id)
        self.assertEquals(stream.name, "test_stream")
        self.pubsub_management.delete_stream(stream_id)

        with self.assertRaises(NotFound):
            self.pubsub_management.read_stream(stream_id)

        defs, assocs = self.resource_registry.find_objects(
            subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True
        )
        self.assertFalse(len(defs))

        topics, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasTopic, id_only=True)
        self.assertFalse(len(topics))

        self.pubsub_management.delete_topic(topic_id)
        self.pubsub_management.delete_topic(topic2_id)
        self.pubsub_management.delete_stream_definition(stream_def_id)

    def test_subscription_crud(self):
        stream_def_id = self.pubsub_management.create_stream_definition("test_definition", stream_type="stream")
        stream_id, route = self.pubsub_management.create_stream(
            name="test_stream", exchange_point="test_exchange", stream_definition_id=stream_def_id
        )
        subscription_id = self.pubsub_management.create_subscription(
            name="test subscription", stream_ids=[stream_id], exchange_name="test_queue"
        )
        self.exchange_cleanup.append("test_exchange")

        subs, assocs = self.resource_registry.find_objects(
            subject=subscription_id, predicate=PRED.hasStream, id_only=True
        )
        self.assertEquals(subs, [stream_id])

        res, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="test_queue", id_only=True)
        self.assertEquals(len(res), 1)

        subs, assocs = self.resource_registry.find_subjects(
            object=subscription_id, predicate=PRED.hasSubscription, id_only=True
        )
        self.assertEquals(subs[0], res[0])

        subscription = self.pubsub_management.read_subscription(subscription_id)
        self.assertEquals(subscription.exchange_name, "test_queue")

        self.pubsub_management.delete_subscription(subscription_id)

        subs, assocs = self.resource_registry.find_objects(
            subject=subscription_id, predicate=PRED.hasStream, id_only=True
        )
        self.assertFalse(len(subs))

        subs, assocs = self.resource_registry.find_subjects(
            object=subscription_id, predicate=PRED.hasSubscription, id_only=True
        )
        self.assertFalse(len(subs))

        self.pubsub_management.delete_stream(stream_id)
        self.pubsub_management.delete_stream_definition(stream_def_id)

    def test_move_before_activate(self):
        stream_id, route = self.pubsub_management.create_stream(name="test_stream", exchange_point="test_xp")

        # --------------------------------------------------------------------------------
        # Test moving before activate
        # --------------------------------------------------------------------------------

        subscription_id = self.pubsub_management.create_subscription("first_queue", stream_ids=[stream_id])

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="first_queue", id_only=True)
        subjects, _ = self.resource_registry.find_subjects(
            object=subscription_id, predicate=PRED.hasSubscription, id_only=True
        )
        self.assertEquals(xn_ids[0], subjects[0])

        self.pubsub_management.move_subscription(subscription_id, exchange_name="second_queue")

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="second_queue", id_only=True)
        subjects, _ = self.resource_registry.find_subjects(
            object=subscription_id, predicate=PRED.hasSubscription, id_only=True
        )

        self.assertEquals(len(subjects), 1)
        self.assertEquals(subjects[0], xn_ids[0])

        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)

    def test_move_activated_subscription(self):

        stream_id, route = self.pubsub_management.create_stream(name="test_stream", exchange_point="test_xp")
        # --------------------------------------------------------------------------------
        # Test moving after activate
        # --------------------------------------------------------------------------------

        subscription_id = self.pubsub_management.create_subscription("first_queue", stream_ids=[stream_id])
        self.pubsub_management.activate_subscription(subscription_id)

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="first_queue", id_only=True)
        subjects, _ = self.resource_registry.find_subjects(
            object=subscription_id, predicate=PRED.hasSubscription, id_only=True
        )
        self.assertEquals(xn_ids[0], subjects[0])

        self.verified = Event()

        def verify(m, r, s):
            self.assertEquals(m, "verified")
            self.verified.set()

        subscriber = StandaloneStreamSubscriber("second_queue", verify)
        subscriber.start()

        self.pubsub_management.move_subscription(subscription_id, exchange_name="second_queue")

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="second_queue", id_only=True)
        subjects, _ = self.resource_registry.find_subjects(
            object=subscription_id, predicate=PRED.hasSubscription, id_only=True
        )

        self.assertEquals(len(subjects), 1)
        self.assertEquals(subjects[0], xn_ids[0])

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish("verified")

        self.assertTrue(self.verified.wait(2))

        self.pubsub_management.deactivate_subscription(subscription_id)

        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)

    def test_queue_cleanup(self):
        stream_id, route = self.pubsub_management.create_stream("test_stream", "xp1")
        xn_objs, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="queue1")
        for xn_obj in xn_objs:
            xn = self.container.ex_manager.create_xn_queue(xn_obj.name)
            xn.delete()
        subscription_id = self.pubsub_management.create_subscription("queue1", stream_ids=[stream_id])
        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="queue1")
        self.assertEquals(len(xn_ids), 1)

        self.pubsub_management.delete_subscription(subscription_id)

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="queue1")
        self.assertEquals(len(xn_ids), 0)

    def test_activation_and_deactivation(self):
        stream_id, route = self.pubsub_management.create_stream("stream1", "xp1")
        subscription_id = self.pubsub_management.create_subscription("sub1", stream_ids=[stream_id])

        self.check1 = Event()

        def verifier(m, r, s):
            self.check1.set()

        subscriber = StandaloneStreamSubscriber("sub1", verifier)
        subscriber.start()

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish("should not receive")

        self.assertFalse(self.check1.wait(0.25))

        self.pubsub_management.activate_subscription(subscription_id)

        publisher.publish("should receive")
        self.assertTrue(self.check1.wait(2))

        self.check1.clear()
        self.assertFalse(self.check1.is_set())

        self.pubsub_management.deactivate_subscription(subscription_id)

        publisher.publish("should not receive")
        self.assertFalse(self.check1.wait(0.5))

        self.pubsub_management.activate_subscription(subscription_id)

        publisher.publish("should receive")
        self.assertTrue(self.check1.wait(2))

        subscriber.stop()

        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)

    def test_topic_crud(self):

        topic_id = self.pubsub_management.create_topic(name="test_topic", exchange_point="test_xp")
        self.exchange_cleanup.append("test_xp")

        topic = self.pubsub_management.read_topic(topic_id)

        self.assertEquals(topic.name, "test_topic")
        self.assertEquals(topic.exchange_point, "test_xp")

        self.pubsub_management.delete_topic(topic_id)
        with self.assertRaises(NotFound):
            self.pubsub_management.read_topic(topic_id)

    def test_full_pubsub(self):

        self.sub1_sat = Event()
        self.sub2_sat = Event()

        def subscriber1(m, r, s):
            self.sub1_sat.set()

        def subscriber2(m, r, s):
            self.sub2_sat.set()

        sub1 = StandaloneStreamSubscriber("sub1", subscriber1)
        self.queue_cleanup.append(sub1.xn.queue)
        sub1.start()

        sub2 = StandaloneStreamSubscriber("sub2", subscriber2)
        self.queue_cleanup.append(sub2.xn.queue)
        sub2.start()

        log_topic = self.pubsub_management.create_topic("instrument_logs", exchange_point="instruments")
        science_topic = self.pubsub_management.create_topic("science_data", exchange_point="instruments")
        events_topic = self.pubsub_management.create_topic("notifications", exchange_point="events")

        log_stream, route = self.pubsub_management.create_stream(
            "instrument1-logs", topic_ids=[log_topic], exchange_point="instruments"
        )
        ctd_stream, route = self.pubsub_management.create_stream(
            "instrument1-ctd", topic_ids=[science_topic], exchange_point="instruments"
        )
        event_stream, route = self.pubsub_management.create_stream(
            "notifications", topic_ids=[events_topic], exchange_point="events"
        )
        raw_stream, route = self.pubsub_management.create_stream("temp", exchange_point="global.data")
        self.exchange_cleanup.extend(["instruments", "events", "global.data"])

        subscription1 = self.pubsub_management.create_subscription(
            "subscription1", stream_ids=[log_stream, event_stream], exchange_name="sub1"
        )
        subscription2 = self.pubsub_management.create_subscription(
            "subscription2", exchange_points=["global.data"], stream_ids=[ctd_stream], exchange_name="sub2"
        )

        self.pubsub_management.activate_subscription(subscription1)
        self.pubsub_management.activate_subscription(subscription2)

        self.publish_on_stream(log_stream, 1)
        self.assertTrue(self.sub1_sat.wait(4))
        self.assertFalse(self.sub2_sat.is_set())

        self.publish_on_stream(raw_stream, 1)
        self.assertTrue(self.sub1_sat.wait(4))

        sub1.stop()
        sub2.stop()

    def test_topic_craziness(self):

        self.msg_queue = Queue()

        def subscriber1(m, r, s):
            self.msg_queue.put(m)

        sub1 = StandaloneStreamSubscriber("sub1", subscriber1)
        self.queue_cleanup.append(sub1.xn.queue)
        sub1.start()

        topic1 = self.pubsub_management.create_topic("topic1", exchange_point="xp1")
        topic2 = self.pubsub_management.create_topic("topic2", exchange_point="xp1", parent_topic_id=topic1)
        topic3 = self.pubsub_management.create_topic("topic3", exchange_point="xp1", parent_topic_id=topic1)
        topic4 = self.pubsub_management.create_topic("topic4", exchange_point="xp1", parent_topic_id=topic2)
        topic5 = self.pubsub_management.create_topic("topic5", exchange_point="xp1", parent_topic_id=topic2)
        topic6 = self.pubsub_management.create_topic("topic6", exchange_point="xp1", parent_topic_id=topic3)
        topic7 = self.pubsub_management.create_topic("topic7", exchange_point="xp1", parent_topic_id=topic3)

        # Tree 2
        topic8 = self.pubsub_management.create_topic("topic8", exchange_point="xp2")
        topic9 = self.pubsub_management.create_topic("topic9", exchange_point="xp2", parent_topic_id=topic8)
        topic10 = self.pubsub_management.create_topic("topic10", exchange_point="xp2", parent_topic_id=topic9)
        topic11 = self.pubsub_management.create_topic("topic11", exchange_point="xp2", parent_topic_id=topic9)
        topic12 = self.pubsub_management.create_topic("topic12", exchange_point="xp2", parent_topic_id=topic11)
        topic13 = self.pubsub_management.create_topic("topic13", exchange_point="xp2", parent_topic_id=topic11)
        self.exchange_cleanup.extend(["xp1", "xp2"])

        stream1_id, route = self.pubsub_management.create_stream(
            "stream1", topic_ids=[topic7, topic4, topic5], exchange_point="xp1"
        )
        stream2_id, route = self.pubsub_management.create_stream("stream2", topic_ids=[topic8], exchange_point="xp2")
        stream3_id, route = self.pubsub_management.create_stream(
            "stream3", topic_ids=[topic10, topic13], exchange_point="xp2"
        )
        stream4_id, route = self.pubsub_management.create_stream("stream4", topic_ids=[topic9], exchange_point="xp2")
        stream5_id, route = self.pubsub_management.create_stream("stream5", topic_ids=[topic11], exchange_point="xp2")

        subscription1 = self.pubsub_management.create_subscription("sub1", topic_ids=[topic1])
        subscription2 = self.pubsub_management.create_subscription("sub2", topic_ids=[topic8], exchange_name="sub1")
        subscription3 = self.pubsub_management.create_subscription("sub3", topic_ids=[topic9], exchange_name="sub1")
        subscription4 = self.pubsub_management.create_subscription(
            "sub4", topic_ids=[topic10, topic13, topic11], exchange_name="sub1"
        )
        # --------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription1)

        self.publish_on_stream(stream1_id, 1)

        self.assertEquals(self.msg_queue.get(timeout=10), 1)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.1)

        self.pubsub_management.deactivate_subscription(subscription1)
        self.pubsub_management.delete_subscription(subscription1)
        # --------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription2)

        self.publish_on_stream(stream2_id, 2)
        self.assertEquals(self.msg_queue.get(timeout=10), 2)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.1)

        self.pubsub_management.deactivate_subscription(subscription2)
        self.pubsub_management.delete_subscription(subscription2)

        # --------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription3)

        self.publish_on_stream(stream2_id, 3)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.3)

        self.publish_on_stream(stream3_id, 4)
        self.assertEquals(self.msg_queue.get(timeout=10), 4)

        self.pubsub_management.deactivate_subscription(subscription3)
        self.pubsub_management.delete_subscription(subscription3)

        # --------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription4)

        self.publish_on_stream(stream4_id, 5)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.3)

        self.publish_on_stream(stream5_id, 6)
        self.assertEquals(self.msg_queue.get(timeout=10), 6)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.3)

        self.pubsub_management.deactivate_subscription(subscription4)
        self.pubsub_management.delete_subscription(subscription4)

        # --------------------------------------------------------------------------------
        sub1.stop()

        self.pubsub_management.delete_topic(topic13)
        self.pubsub_management.delete_topic(topic12)
        self.pubsub_management.delete_topic(topic11)
        self.pubsub_management.delete_topic(topic10)
        self.pubsub_management.delete_topic(topic9)
        self.pubsub_management.delete_topic(topic8)
        self.pubsub_management.delete_topic(topic7)
        self.pubsub_management.delete_topic(topic6)
        self.pubsub_management.delete_topic(topic5)
        self.pubsub_management.delete_topic(topic4)
        self.pubsub_management.delete_topic(topic3)
        self.pubsub_management.delete_topic(topic2)
        self.pubsub_management.delete_topic(topic1)

        self.pubsub_management.delete_stream(stream1_id)
        self.pubsub_management.delete_stream(stream2_id)
        self.pubsub_management.delete_stream(stream3_id)
        self.pubsub_management.delete_stream(stream4_id)
        self.pubsub_management.delete_stream(stream5_id)

    def _get_pdict(self, filter_values):
        t_ctxt = ParameterContext("time", param_type=QuantityType(value_encoding=np.dtype("int64")))
        t_ctxt.uom = "seconds since 01-01-1900"
        t_ctxt.fill_value = -9999
        t_ctxt_id = self.dataset_management.create_parameter_context(
            name="time", parameter_context=t_ctxt.dump(), parameter_type="quantity<int64>", unit_of_measure=t_ctxt.uom
        )

        lat_ctxt = ParameterContext("lat", param_type=ConstantType(QuantityType(value_encoding=np.dtype("float32"))))
        lat_ctxt.axis = AxisTypeEnum.LAT
        lat_ctxt.uom = "degree_north"
        lat_ctxt.fill_value = -9999
        lat_ctxt_id = self.dataset_management.create_parameter_context(
            name="lat",
            parameter_context=lat_ctxt.dump(),
            parameter_type="quantity<float32>",
            unit_of_measure=lat_ctxt.uom,
        )

        lon_ctxt = ParameterContext("lon", param_type=ConstantType(QuantityType(value_encoding=np.dtype("float32"))))
        lon_ctxt.axis = AxisTypeEnum.LON
        lon_ctxt.uom = "degree_east"
        lon_ctxt.fill_value = -9999
        lon_ctxt_id = self.dataset_management.create_parameter_context(
            name="lon",
            parameter_context=lon_ctxt.dump(),
            parameter_type="quantity<float32>",
            unit_of_measure=lon_ctxt.uom,
        )

        temp_ctxt = ParameterContext("TEMPWAT_L0", param_type=QuantityType(value_encoding=np.dtype("float32")))
        temp_ctxt.uom = "deg_C"
        temp_ctxt.fill_value = -9999
        temp_ctxt_id = self.dataset_management.create_parameter_context(
            name="TEMPWAT_L0",
            parameter_context=temp_ctxt.dump(),
            parameter_type="quantity<float32>",
            unit_of_measure=temp_ctxt.uom,
        )

        # Conductivity - values expected to be the decimal results of conversion from hex
        cond_ctxt = ParameterContext("CONDWAT_L0", param_type=QuantityType(value_encoding=np.dtype("float32")))
        cond_ctxt.uom = "S m-1"
        cond_ctxt.fill_value = -9999
        cond_ctxt_id = self.dataset_management.create_parameter_context(
            name="CONDWAT_L0",
            parameter_context=cond_ctxt.dump(),
            parameter_type="quantity<float32>",
            unit_of_measure=cond_ctxt.uom,
        )

        # Pressure - values expected to be the decimal results of conversion from hex
        press_ctxt = ParameterContext("PRESWAT_L0", param_type=QuantityType(value_encoding=np.dtype("float32")))
        press_ctxt.uom = "dbar"
        press_ctxt.fill_value = -9999
        press_ctxt_id = self.dataset_management.create_parameter_context(
            name="PRESWAT_L0",
            parameter_context=press_ctxt.dump(),
            parameter_type="quantity<float32>",
            unit_of_measure=press_ctxt.uom,
        )

        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        tl1_func = "(TEMPWAT_L0 / 10000) - 10"
        tl1_pmap = {"TEMPWAT_L0": "TEMPWAT_L0"}
        func = NumexprFunction("TEMPWAT_L1", tl1_func, tl1_pmap)
        tempL1_ctxt = ParameterContext(
            "TEMPWAT_L1", param_type=ParameterFunctionType(function=func), variability=VariabilityEnum.TEMPORAL
        )
        tempL1_ctxt.uom = "deg_C"

        tempL1_ctxt_id = self.dataset_management.create_parameter_context(
            name=tempL1_ctxt.name,
            parameter_context=tempL1_ctxt.dump(),
            parameter_type="pfunc",
            unit_of_measure=tempL1_ctxt.uom,
        )

        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        cl1_func = "(CONDWAT_L0 / 100000) - 0.5"
        cl1_pmap = {"CONDWAT_L0": "CONDWAT_L0"}
        func = NumexprFunction("CONDWAT_L1", cl1_func, cl1_pmap)
        condL1_ctxt = ParameterContext(
            "CONDWAT_L1", param_type=ParameterFunctionType(function=func), variability=VariabilityEnum.TEMPORAL
        )
        condL1_ctxt.uom = "S m-1"
        condL1_ctxt_id = self.dataset_management.create_parameter_context(
            name=condL1_ctxt.name,
            parameter_context=condL1_ctxt.dump(),
            parameter_type="pfunc",
            unit_of_measure=condL1_ctxt.uom,
        )

        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        pl1_func = "(PRESWAT_L0 * 679.34040721 / (0.85 * 65536)) - (0.05 * 679.34040721)"
        pl1_pmap = {"PRESWAT_L0": "PRESWAT_L0"}
        func = NumexprFunction("PRESWAT_L1", pl1_func, pl1_pmap)
        presL1_ctxt = ParameterContext(
            "PRESWAT_L1", param_type=ParameterFunctionType(function=func), variability=VariabilityEnum.TEMPORAL
        )
        presL1_ctxt.uom = "S m-1"
        presL1_ctxt_id = self.dataset_management.create_parameter_context(
            name=presL1_ctxt.name,
            parameter_context=presL1_ctxt.dump(),
            parameter_type="pfunc",
            unit_of_measure=presL1_ctxt.uom,
        )

        # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project:
        #       https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1

        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        owner = "gsw"
        sal_func = "SP_from_C"
        sal_arglist = [NumexprFunction("CONDWAT_L1*10", "C*10", {"C": "CONDWAT_L1"}), "TEMPWAT_L1", "PRESWAT_L1"]
        sal_kwargmap = None
        func = PythonFunction("PRACSAL", owner, sal_func, sal_arglist, sal_kwargmap)
        sal_ctxt = ParameterContext(
            "PRACSAL", param_type=ParameterFunctionType(func), variability=VariabilityEnum.TEMPORAL
        )
        sal_ctxt.uom = "g kg-1"

        sal_ctxt_id = self.dataset_management.create_parameter_context(
            name=sal_ctxt.name, parameter_context=sal_ctxt.dump(), parameter_type="pfunc", unit_of_measure=sal_ctxt.uom
        )

        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        owner = "gsw"
        abs_sal_func = PythonFunction("abs_sal", owner, "SA_from_SP", ["PRACSAL", "PRESWAT_L1", "lon", "lat"], None)
        # abs_sal_func = PythonFunction('abs_sal', owner, 'SA_from_SP', ['lon','lat'], None)
        cons_temp_func = PythonFunction(
            "cons_temp", owner, "CT_from_t", [abs_sal_func, "TEMPWAT_L1", "PRESWAT_L1"], None
        )
        dens_func = PythonFunction("DENSITY", owner, "rho", [abs_sal_func, cons_temp_func, "PRESWAT_L1"], None)
        dens_ctxt = ParameterContext(
            "DENSITY", param_type=ParameterFunctionType(dens_func), variability=VariabilityEnum.TEMPORAL
        )
        dens_ctxt.uom = "kg m-3"

        dens_ctxt_id = self.dataset_management.create_parameter_context(
            name=dens_ctxt.name,
            parameter_context=dens_ctxt.dump(),
            parameter_type="pfunc",
            unit_of_measure=dens_ctxt.uom,
        )

        ids = [
            t_ctxt_id,
            lat_ctxt_id,
            lon_ctxt_id,
            temp_ctxt_id,
            cond_ctxt_id,
            press_ctxt_id,
            tempL1_ctxt_id,
            condL1_ctxt_id,
            presL1_ctxt_id,
            sal_ctxt_id,
            dens_ctxt_id,
        ]
        contexts = [
            t_ctxt,
            lat_ctxt,
            lon_ctxt,
            temp_ctxt,
            cond_ctxt,
            press_ctxt,
            tempL1_ctxt,
            condL1_ctxt,
            presL1_ctxt,
            sal_ctxt,
            dens_ctxt,
        ]
        context_ids = [ids[i] for i, ctxt in enumerate(contexts) if ctxt.name in filter_values]
        pdict_name = "_".join([ctxt.name for ctxt in contexts if ctxt.name in filter_values])

        pdict_id = self.dataset_management.create_parameter_dictionary(
            pdict_name, parameter_context_ids=context_ids, temporal_context="time"
        )
        return pdict_id
コード例 #21
0
class TestDMEnd2End(IonIntegrationTestCase):
    def setUp(self):  # Love the non pep-8 convention
        self._start_container()

        self.container.start_rel_from_url("res/deploy/r2deploy.yml")

        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.pubsub_management = PubsubManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.ingestion_management = IngestionManagementServiceClient()
        self.data_retriever = DataRetrieverServiceClient()
        self.pids = []
        self.event = Event()
        self.exchange_space_name = "test_granules"
        self.exchange_point_name = "science_data"
        self.i = 0

        self.purge_queues()
        self.queue_buffer = []
        self.streams = []
        self.addCleanup(self.stop_all_ingestion)

    def purge_queues(self):
        xn = self.container.ex_manager.create_xn_queue("science_granule_ingestion")
        xn.purge()

    def tearDown(self):
        self.purge_queues()
        for pid in self.pids:
            self.container.proc_manager.terminate_process(pid)
        IngestionManagementIntTest.clean_subscriptions()
        for queue in self.queue_buffer:
            if isinstance(queue, ExchangeNameQueue):
                queue.delete()
            elif isinstance(queue, str):
                xn = self.container.ex_manager.create_xn_queue(queue)
                xn.delete()

    # --------------------------------------------------------------------------------
    # Helper/Utility methods
    # --------------------------------------------------------------------------------

    def create_dataset(self, parameter_dict_id=""):
        """
        Creates a time-series dataset
        """
        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()
        if not parameter_dict_id:
            parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name(
                "ctd_parsed_param_dict", id_only=True
            )

        dataset_id = self.dataset_management.create_dataset(
            "test_dataset_%i" % self.i,
            parameter_dictionary_id=parameter_dict_id,
            spatial_domain=sdom,
            temporal_domain=tdom,
        )
        return dataset_id

    def get_datastore(self, dataset_id):
        """
        Gets an instance of the datastore
            This method is primarily used to defeat a bug where integration tests in multiple containers may sometimes 
            delete a CouchDB datastore and the other containers are unaware of the new state of the datastore.
        """
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore

    def get_ingestion_config(self):
        """
        Grab the ingestion configuration from the resource registry
        """
        # The ingestion configuration should have been created by the bootstrap service
        # which is configured through r2deploy.yml

        ingest_configs, _ = self.resource_registry.find_resources(restype=RT.IngestionConfiguration, id_only=True)
        return ingest_configs[0]

    def launch_producer(self, stream_id=""):
        """
        Launch the producer
        """

        pid = self.container.spawn_process(
            "better_data_producer",
            "ion.processes.data.example_data_producer",
            "BetterDataProducer",
            {"process": {"stream_id": stream_id}},
        )

        self.pids.append(pid)

    def make_simple_dataset(self):
        """
        Makes a stream, a stream definition and a dataset, the essentials for most of these tests
        """
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition("ctd data", parameter_dictionary_id=pdict_id)
        stream_id, route = self.pubsub_management.create_stream(
            "ctd stream %i" % self.i, "xp1", stream_definition_id=stream_def_id
        )

        dataset_id = self.create_dataset(pdict_id)

        self.get_datastore(dataset_id)
        self.i += 1
        return stream_id, route, stream_def_id, dataset_id

    def publish_hifi(self, stream_id, stream_route, offset=0):
        """
        Publish deterministic data
        """

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt["time"] = np.arange(10) + (offset * 10)
        rdt["temp"] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())

    def publish_fake_data(self, stream_id, route):
        """
        Make four granules
        """
        for i in xrange(4):
            self.publish_hifi(stream_id, route, i)

    def start_ingestion(self, stream_id, dataset_id):
        """
        Starts ingestion/persistence for a given dataset
        """
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id
        )

    def stop_ingestion(self, stream_id):
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=ingest_config_id
        )

    def stop_all_ingestion(self):
        try:
            [self.stop_ingestion(sid) for sid in self.streams]
        except:
            pass

    def validate_granule_subscription(self, msg, route, stream_id):
        """
        Validation for granule format
        """
        if msg == {}:
            return
        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.info("%s", rdt.pretty_print())
        self.assertIsInstance(msg, Granule, "Message is improperly formatted. (%s)" % type(msg))
        self.event.set()

    def wait_until_we_have_enough_granules(self, dataset_id="", data_size=40):
        """
        Loops until there is a sufficient amount of data in the dataset
        """
        done = False
        with gevent.Timeout(40):
            while not done:
                extents = self.dataset_management.dataset_extents(dataset_id, "time")[0]
                granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1)
                rdt = RecordDictionaryTool.load_from_granule(granule)
                if rdt["time"] and rdt["time"][0] != rdt._pdict.get_context("time").fill_value and extents >= data_size:
                    done = True
                else:
                    gevent.sleep(0.2)

    # --------------------------------------------------------------------------------
    # Test Methods
    # --------------------------------------------------------------------------------

    @attr("SMOKE")
    def test_dm_end_2_end(self):
        # --------------------------------------------------------------------------------
        # Set up a stream and have a mock instrument (producer) send data
        # --------------------------------------------------------------------------------
        self.event.clear()

        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext("binary", param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context("binary", bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext("records", param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context("records", rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary(
            "replay_pdict", parameter_context_ids=context_ids, temporal_context="time"
        )

        stream_definition = self.pubsub_management.create_stream_definition(
            "ctd data", parameter_dictionary_id=pdict_id
        )

        stream_id, route = self.pubsub_management.create_stream(
            "producer", exchange_point=self.exchange_point_name, stream_definition_id=stream_definition
        )

        # --------------------------------------------------------------------------------
        # Start persisting the data on the stream
        # - Get the ingestion configuration from the resource registry
        # - Create the dataset
        # - call persist_data_stream to setup the subscription for the ingestion workers
        #   on the stream that you specify which causes the data to be persisted
        # --------------------------------------------------------------------------------

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id
        )

        # --------------------------------------------------------------------------------
        # Now the granules are ingesting and persisted
        # --------------------------------------------------------------------------------

        self.launch_producer(stream_id)
        self.wait_until_we_have_enough_granules(dataset_id, 40)

        # --------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        # --------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_id)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        self.assertTrue((rdt["time"][:10] == np.arange(10)).all(), "%s" % rdt["time"][:])
        self.assertTrue((rdt["binary"][:10] == np.array(["hi"] * 10, dtype="object")).all())

        # --------------------------------------------------------------------------------
        # Now to try the streamed approach
        # --------------------------------------------------------------------------------
        replay_stream_id, replay_route = self.pubsub_management.create_stream(
            "replay_out", exchange_point=self.exchange_point_name, stream_definition_id=stream_definition
        )
        self.replay_id, process_id = self.data_retriever.define_replay(
            dataset_id=dataset_id, stream_id=replay_stream_id
        )
        log.info("Process ID: %s", process_id)

        replay_client = ReplayClient(process_id)

        # --------------------------------------------------------------------------------
        # Create the listening endpoint for the the retriever to talk to
        # --------------------------------------------------------------------------------
        xp = self.container.ex_manager.create_xp(self.exchange_point_name)
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        self.data_retriever.start_replay_agent(self.replay_id)

        self.assertTrue(replay_client.await_agent_ready(5), "The process never launched")
        replay_client.start_replay()

        self.assertTrue(self.event.wait(10))
        subscriber.stop()

        self.data_retriever.cancel_replay_agent(self.replay_id)

        # --------------------------------------------------------------------------------
        # Test the slicing capabilities
        # --------------------------------------------------------------------------------

        granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={"tdoa": slice(0, 5)})
        rdt = RecordDictionaryTool.load_from_granule(granule)
        b = rdt["time"] == np.arange(5)
        self.assertTrue(b.all() if not isinstance(b, bool) else b)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    @unittest.skip("Doesnt work")
    @attr("LOCOINT")
    @unittest.skipIf(os.getenv("CEI_LAUNCH_TEST", False), "Skip test while in CEI LAUNCH mode")
    def test_replay_pause(self):
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext("binary", param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context("binary", bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext("records", param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context("records", rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary(
            "replay_pdict", parameter_context_ids=context_ids, temporal_context="time"
        )

        stream_def_id = self.pubsub_management.create_stream_definition(
            "replay_stream", parameter_dictionary_id=pdict_id
        )
        replay_stream, replay_route = self.pubsub_management.create_stream(
            "replay", "xp1", stream_definition_id=stream_def_id
        )
        dataset_id = self.create_dataset(pdict_id)
        scov = DatasetManagementService._get_coverage(dataset_id)

        bb = CoverageCraft(scov)
        bb.rdt["time"] = np.arange(100)
        bb.rdt["temp"] = np.random.random(100) + 30
        bb.sync_with_granule()

        DatasetManagementService._persist_coverage(
            dataset_id, bb.coverage
        )  # This invalidates it for multi-host configurations
        # Set up the subscriber to verify the data
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        xp = self.container.ex_manager.create_xp("xp1")
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        # Set up the replay agent and the client wrapper

        # 1) Define the Replay (dataset and stream to publish on)
        self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream)
        # 2) Make a client to the interact with the process (optionall provide it a process to bind with)
        replay_client = ReplayClient(process_id)
        # 3) Start the agent (launch the process)
        self.data_retriever.start_replay_agent(self.replay_id)
        # 4) Start replaying...
        replay_client.start_replay()

        # Wait till we get some granules
        self.assertTrue(self.event.wait(5))

        # We got granules, pause the replay, clear the queue and allow the process to finish consuming
        replay_client.pause_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure there's no remaining messages being consumed
        self.assertFalse(self.event.wait(1))

        # Resume the replay and wait until we start getting granules again
        replay_client.resume_replay()
        self.assertTrue(self.event.wait(5))

        # Stop the replay, clear the queues
        replay_client.stop_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure that it did indeed stop
        self.assertFalse(self.event.wait(1))

        subscriber.stop()

    def test_retrieve_and_transform(self):
        # Make a simple dataset and start ingestion, pretty standard stuff.
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(ctd_stream_id, dataset_id)

        # Stream definition for the salinity data
        salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            "ctd_parsed_param_dict", id_only=True
        )
        sal_stream_def_id = self.pubsub_management.create_stream_definition(
            "sal data", parameter_dictionary_id=salinity_pdict_id
        )

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt["time"] = np.arange(10)
        rdt["temp"] = np.random.randn(10) * 10 + 30
        rdt["conductivity"] = np.random.randn(10) * 2 + 10
        rdt["pressure"] = np.random.randn(10) * 1 + 12

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        publisher.publish(rdt.to_granule())

        rdt["time"] = np.arange(10, 20)

        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id, 20)

        granule = self.data_retriever.retrieve(
            dataset_id,
            None,
            None,
            "ion.processes.data.transforms.ctd.ctd_L2_salinity",
            "CTDL2SalinityTransformAlgorithm",
            kwargs=dict(params=sal_stream_def_id),
        )
        rdt = RecordDictionaryTool.load_from_granule(granule)
        for i in rdt["salinity"]:
            self.assertNotEquals(i, 0)
        self.streams.append(ctd_stream_id)
        self.stop_ingestion(ctd_stream_id)

    def test_last_granule(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)

        self.publish_hifi(stream_id, route, 0)
        self.publish_hifi(stream_id, route, 1)

        self.wait_until_we_have_enough_granules(dataset_id, 20)  # I just need two

        success = False

        def verifier():
            replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id, 10)

            rdt = RecordDictionaryTool.load_from_granule(replay_granule)

            comp = rdt["time"] == np.arange(10) + 10
            if not isinstance(comp, bool):
                return comp.all()
            return False

        success = poll(verifier)

        self.assertTrue(success)

        success = False

        def verify_points():
            replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id, 5)

            rdt = RecordDictionaryTool.load_from_granule(replay_granule)

            comp = rdt["time"] == np.arange(15, 20)
            if not isinstance(comp, bool):
                return comp.all()
            return False

        success = poll(verify_points)

        self.assertTrue(success)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    def test_replay_with_parameters(self):
        # --------------------------------------------------------------------------------
        # Create the configurations and the dataset
        # --------------------------------------------------------------------------------
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext("binary", param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context("binary", bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext("records", param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context("records", rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary(
            "replay_pdict", parameter_context_ids=context_ids, temporal_context="time"
        )

        stream_def_id = self.pubsub_management.create_stream_definition(
            "replay_stream", parameter_dictionary_id=pdict_id
        )

        stream_id, route = self.pubsub_management.create_stream(
            "replay_with_params", exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id
        )
        config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id
        )

        # --------------------------------------------------------------------------------
        # Coerce the datastore into existence (beats race condition)
        # --------------------------------------------------------------------------------
        self.get_datastore(dataset_id)

        self.launch_producer(stream_id)

        self.wait_until_we_have_enough_granules(dataset_id, 40)

        query = {
            "start_time": 0 - 2208988800,
            "end_time": 20 - 2208988800,
            "stride_time": 2,
            "parameters": ["time", "temp"],
        }
        retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id, query=query)

        rdt = RecordDictionaryTool.load_from_granule(retrieved_data)
        comp = np.arange(0, 20, 2) == rdt["time"]
        self.assertTrue(comp.all(), "%s" % rdt.pretty_print())
        self.assertEquals(set(rdt.iterkeys()), set(["time", "temp"]))

        extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=["time", "temp"])
        self.assertTrue(extents["time"] >= 20)
        self.assertTrue(extents["temp"] >= 20)

        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    def test_repersist_data(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.publish_hifi(stream_id, route, 0)
        self.publish_hifi(stream_id, route, 1)
        self.wait_until_we_have_enough_granules(dataset_id, 20)
        config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id)
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id
        )
        self.publish_hifi(stream_id, route, 2)
        self.publish_hifi(stream_id, route, 3)
        self.wait_until_we_have_enough_granules(dataset_id, 40)
        success = False
        with gevent.timeout.Timeout(5):
            while not success:

                replay_granule = self.data_retriever.retrieve(dataset_id)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt["time"] == np.arange(0, 40)
                if not isinstance(comp, bool):
                    success = comp.all()
                gevent.sleep(1)

        self.assertTrue(success)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    @attr("LOCOINT")
    @unittest.skipIf(
        os.getenv("CEI_LAUNCH_TEST", False),
        "Host requires file-system access to coverage files, CEI mode does not support.",
    )
    def test_correct_time(self):

        # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e.
        #  the conversion factor between unix and NTP time
        unix_now = np.floor(time.time())
        ntp_now = unix_now + 2208988800

        unix_ago = unix_now - 20
        ntp_ago = unix_ago + 2208988800

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        coverage = DatasetManagementService._get_coverage(dataset_id)
        coverage.insert_timesteps(20)
        coverage.set_parameter_values("time", np.arange(ntp_ago, ntp_now))

        temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id)

        self.assertTrue(np.abs(temporal_bounds[0] - unix_ago) < 2)
        self.assertTrue(np.abs(temporal_bounds[1] - unix_now) < 2)

    @attr("LOCOINT")
    @unittest.skipIf(
        os.getenv("CEI_LAUNCH_TEST", False),
        "Host requires file-system access to coverage files, CEI mode does not support.",
    )
    def test_empty_coverage_time(self):

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        coverage = DatasetManagementService._get_coverage(dataset_id)
        temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id)
        self.assertEquals([coverage.get_parameter_context("time").fill_value] * 2, temporal_bounds)

    @attr("LOCOINT")
    @unittest.skipIf(
        os.getenv("CEI_LAUNCH_TEST", False),
        "Host requires file-system access to coverage files, CEI mode does not support.",
    )
    def test_out_of_band_retrieve(self):
        # Setup the environemnt
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)

        # Fill the dataset
        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id, 40)

        # Retrieve the data
        granule = DataRetrieverService.retrieve_oob(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assertTrue((rdt["time"] == np.arange(40)).all())

    @attr("LOCOINT")
    @unittest.skipIf(
        os.getenv("CEI_LAUNCH_TEST", False),
        "Host requires file-system access to coverage files, CEI mode does not support.",
    )
    def test_retrieve_cache(self):
        DataRetrieverService._refresh_interval = 1
        datasets = [self.make_simple_dataset() for i in xrange(10)]
        for stream_id, route, stream_def_id, dataset_id in datasets:
            coverage = DatasetManagementService._get_coverage(dataset_id)
            coverage.insert_timesteps(10)
            coverage.set_parameter_values("time", np.arange(10))
            coverage.set_parameter_values("temp", np.arange(10))

        # Verify cache hit and refresh
        dataset_ids = [i[3] for i in datasets]
        self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache)
        DataRetrieverService._get_coverage(dataset_ids[0])  # Hit the chache
        cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        # Verify that it was hit and it's now in there
        self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache)

        gevent.sleep(DataRetrieverService._refresh_interval + 0.2)

        DataRetrieverService._get_coverage(dataset_ids[0])  # Hit the chache
        cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        self.assertTrue(age2 != age)

        for dataset_id in dataset_ids:
            DataRetrieverService._get_coverage(dataset_id)

        self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache)

        stream_id, route, stream_def, dataset_id = datasets[0]
        self.start_ingestion(stream_id, dataset_id)
        DataRetrieverService._get_coverage(dataset_id)

        self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache)

        DataRetrieverService._refresh_interval = 100
        self.publish_hifi(stream_id, route, 1)
        self.wait_until_we_have_enough_granules(dataset_id, data_size=20)

        event = gevent.event.Event()
        with gevent.Timeout(20):
            while not event.wait(0.1):
                if dataset_id not in DataRetrieverService._retrieve_cache:
                    event.set()

        self.assertTrue(event.is_set())

    @unittest.skip("Outdated due to ingestion retry")
    @attr("LOCOINT")
    @unittest.skipIf(
        os.getenv("CEI_LAUNCH_TEST", False),
        "Host requires file-system access to coverage files, CEI mode does not support.",
    )
    def test_ingestion_failover(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)

        event = Event()

        def cb(*args, **kwargs):
            event.set()

        sub = EventSubscriber(event_type="ExceptionEvent", callback=cb, origin="stream_exception")
        sub.start()

        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id, 40)

        file_path = DatasetManagementService._get_coverage_path(dataset_id)
        master_file = os.path.join(file_path, "%s_master.hdf5" % dataset_id)

        with open(master_file, "w") as f:
            f.write("this will crash HDF")

        self.publish_hifi(stream_id, route, 5)

        self.assertTrue(event.wait(10))

        sub.stop()
コード例 #22
0
class PubsubManagementIntTest(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.pubsub_management = PubsubManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()
        self.dataset_management = DatasetManagementServiceClient()

        self.pdicts = {}
        self.queue_cleanup = list()
        self.exchange_cleanup = list()

    def tearDown(self):
        for queue in self.queue_cleanup:
            xn = self.container.ex_manager.create_xn_queue(queue)
            xn.delete()
        for exchange in self.exchange_cleanup:
            xp = self.container.ex_manager.create_xp(exchange)
            xp.delete()

    def test_stream_def_crud(self):

        # Test Creation
        pdict = DatasetManagementService.get_parameter_dictionary_by_name(
            'ctd_parsed_param_dict')
        stream_definition_id = self.pubsub_management.create_stream_definition(
            'ctd parsed', parameter_dictionary_id=pdict.identifier)

        # Make sure there is an assoc
        self.assertTrue(
            self.resource_registry.find_associations(
                subject=stream_definition_id,
                predicate=PRED.hasParameterDictionary,
                object=pdict.identifier,
                id_only=True))

        # Test Reading
        stream_definition = self.pubsub_management.read_stream_definition(
            stream_definition_id)
        self.assertTrue(
            PubsubManagementService._compare_pdicts(
                pdict.dump(), stream_definition.parameter_dictionary))

        # Test Deleting
        self.pubsub_management.delete_stream_definition(stream_definition_id)
        self.assertFalse(
            self.resource_registry.find_associations(
                subject=stream_definition_id,
                predicate=PRED.hasParameterDictionary,
                object=pdict.identifier,
                id_only=True))

        # Test comparisons
        in_stream_definition_id = self.pubsub_management.create_stream_definition(
            'L0 products',
            parameter_dictionary_id=pdict.identifier,
            available_fields=['time', 'temp', 'conductivity', 'pressure'])
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        in_stream_definition_id)

        out_stream_definition_id = in_stream_definition_id
        self.assertTrue(
            self.pubsub_management.compare_stream_definition(
                in_stream_definition_id, out_stream_definition_id))
        self.assertTrue(
            self.pubsub_management.compatible_stream_definitions(
                in_stream_definition_id, out_stream_definition_id))

        out_stream_definition_id = self.pubsub_management.create_stream_definition(
            'L2 Products',
            parameter_dictionary_id=pdict.identifier,
            available_fields=['time', 'salinity', 'density'])
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        out_stream_definition_id)
        self.assertFalse(
            self.pubsub_management.compare_stream_definition(
                in_stream_definition_id, out_stream_definition_id))

        self.assertTrue(
            self.pubsub_management.compatible_stream_definitions(
                in_stream_definition_id, out_stream_definition_id))

    def test_validate_stream_defs(self):
        #test no input
        incoming_pdict_id = self._get_pdict(
            ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(
            ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = []
        available_fields_out = []
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_0',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_0',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)

        #test input with no output
        incoming_pdict_id = self._get_pdict(
            ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(
            ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = [
            'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'
        ]
        available_fields_out = []
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_1',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_1',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)

        #test available field missing parameter context definition -- missing PRESWAT_L0
        incoming_pdict_id = self._get_pdict(
            ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0'])
        outgoing_pdict_id = self._get_pdict(
            ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = [
            'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'
        ]
        available_fields_out = ['DENSITY']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_2',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_2',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)

        #test l1 from l0
        incoming_pdict_id = self._get_pdict(
            ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(
            ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = [
            'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'
        ]
        available_fields_out = ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_3',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_3',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)

        #test l2 from l0
        incoming_pdict_id = self._get_pdict(
            ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(
            ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1', 'DENSITY', 'PRACSAL'])
        available_fields_in = [
            'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'
        ]
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_4',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_4',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)

        #test Ln from L0
        incoming_pdict_id = self._get_pdict(
            ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(
            ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = [
            'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'
        ]
        available_fields_out = [
            'DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'
        ]
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_5',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_5',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)

        #test L2 from L1
        incoming_pdict_id = self._get_pdict(
            ['TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        outgoing_pdict_id = self._get_pdict(
            ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = [
            'TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'
        ]
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_6',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_6',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)

        #test L1 from L0 missing L0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON'])
        outgoing_pdict_id = self._get_pdict(
            ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_7',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_7',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)

        #test L2 from L0 missing L0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON'])
        outgoing_pdict_id = self._get_pdict(
            ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_8',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_8',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)

        #test L2 from L0 missing L1
        incoming_pdict_id = self._get_pdict(
            ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL'])
        available_fields_in = [
            'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'
        ]
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_9',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_9',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)

    def publish_on_stream(self, stream_id, msg):
        stream = self.pubsub_management.read_stream(stream_id)
        stream_route = stream.stream_route
        publisher = StandaloneStreamPublisher(stream_id=stream_id,
                                              stream_route=stream_route)
        publisher.publish(msg)

    def test_stream_crud(self):
        stream_def_id = self.pubsub_management.create_stream_definition(
            'test_definition', stream_type='stream')
        topic_id = self.pubsub_management.create_topic(
            name='test_topic', exchange_point='test_exchange')
        self.exchange_cleanup.append('test_exchange')
        topic2_id = self.pubsub_management.create_topic(
            name='another_topic', exchange_point='outside')
        stream_id, route = self.pubsub_management.create_stream(
            name='test_stream',
            topic_ids=[topic_id, topic2_id],
            exchange_point='test_exchange',
            stream_definition_id=stream_def_id)

        topics, assocs = self.resource_registry.find_objects(
            subject=stream_id, predicate=PRED.hasTopic, id_only=True)
        self.assertEquals(topics, [topic_id])

        defs, assocs = self.resource_registry.find_objects(
            subject=stream_id,
            predicate=PRED.hasStreamDefinition,
            id_only=True)
        self.assertTrue(len(defs))

        stream = self.pubsub_management.read_stream(stream_id)
        self.assertEquals(stream.name, 'test_stream')
        self.pubsub_management.delete_stream(stream_id)

        with self.assertRaises(NotFound):
            self.pubsub_management.read_stream(stream_id)

        defs, assocs = self.resource_registry.find_objects(
            subject=stream_id,
            predicate=PRED.hasStreamDefinition,
            id_only=True)
        self.assertFalse(len(defs))

        topics, assocs = self.resource_registry.find_objects(
            subject=stream_id, predicate=PRED.hasTopic, id_only=True)
        self.assertFalse(len(topics))

        self.pubsub_management.delete_topic(topic_id)
        self.pubsub_management.delete_topic(topic2_id)
        self.pubsub_management.delete_stream_definition(stream_def_id)

    def test_subscription_crud(self):
        stream_def_id = self.pubsub_management.create_stream_definition(
            'test_definition', stream_type='stream')
        stream_id, route = self.pubsub_management.create_stream(
            name='test_stream',
            exchange_point='test_exchange',
            stream_definition_id=stream_def_id)
        subscription_id = self.pubsub_management.create_subscription(
            name='test subscription',
            stream_ids=[stream_id],
            exchange_name='test_queue')
        self.exchange_cleanup.append('test_exchange')

        subs, assocs = self.resource_registry.find_objects(
            subject=subscription_id, predicate=PRED.hasStream, id_only=True)
        self.assertEquals(subs, [stream_id])

        res, _ = self.resource_registry.find_resources(restype=RT.ExchangeName,
                                                       name='test_queue',
                                                       id_only=True)
        self.assertEquals(len(res), 1)

        subs, assocs = self.resource_registry.find_subjects(
            object=subscription_id,
            predicate=PRED.hasSubscription,
            id_only=True)
        self.assertEquals(subs[0], res[0])

        subscription = self.pubsub_management.read_subscription(
            subscription_id)
        self.assertEquals(subscription.exchange_name, 'test_queue')

        self.pubsub_management.delete_subscription(subscription_id)

        subs, assocs = self.resource_registry.find_objects(
            subject=subscription_id, predicate=PRED.hasStream, id_only=True)
        self.assertFalse(len(subs))

        subs, assocs = self.resource_registry.find_subjects(
            object=subscription_id,
            predicate=PRED.hasSubscription,
            id_only=True)
        self.assertFalse(len(subs))

        self.pubsub_management.delete_stream(stream_id)
        self.pubsub_management.delete_stream_definition(stream_def_id)

    def test_move_before_activate(self):
        stream_id, route = self.pubsub_management.create_stream(
            name='test_stream', exchange_point='test_xp')

        #--------------------------------------------------------------------------------
        # Test moving before activate
        #--------------------------------------------------------------------------------

        subscription_id = self.pubsub_management.create_subscription(
            'first_queue', stream_ids=[stream_id])

        xn_ids, _ = self.resource_registry.find_resources(
            restype=RT.ExchangeName, name='first_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(
            object=subscription_id,
            predicate=PRED.hasSubscription,
            id_only=True)
        self.assertEquals(xn_ids[0], subjects[0])

        self.pubsub_management.move_subscription(subscription_id,
                                                 exchange_name='second_queue')

        xn_ids, _ = self.resource_registry.find_resources(
            restype=RT.ExchangeName, name='second_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(
            object=subscription_id,
            predicate=PRED.hasSubscription,
            id_only=True)

        self.assertEquals(len(subjects), 1)
        self.assertEquals(subjects[0], xn_ids[0])

        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)

    def test_move_activated_subscription(self):

        stream_id, route = self.pubsub_management.create_stream(
            name='test_stream', exchange_point='test_xp')
        #--------------------------------------------------------------------------------
        # Test moving after activate
        #--------------------------------------------------------------------------------

        subscription_id = self.pubsub_management.create_subscription(
            'first_queue', stream_ids=[stream_id])
        self.pubsub_management.activate_subscription(subscription_id)

        xn_ids, _ = self.resource_registry.find_resources(
            restype=RT.ExchangeName, name='first_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(
            object=subscription_id,
            predicate=PRED.hasSubscription,
            id_only=True)
        self.assertEquals(xn_ids[0], subjects[0])

        self.verified = Event()

        def verify(m, r, s):
            self.assertEquals(m, 'verified')
            self.verified.set()

        subscriber = StandaloneStreamSubscriber('second_queue', verify)
        subscriber.start()

        self.pubsub_management.move_subscription(subscription_id,
                                                 exchange_name='second_queue')

        xn_ids, _ = self.resource_registry.find_resources(
            restype=RT.ExchangeName, name='second_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(
            object=subscription_id,
            predicate=PRED.hasSubscription,
            id_only=True)

        self.assertEquals(len(subjects), 1)
        self.assertEquals(subjects[0], xn_ids[0])

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish('verified')

        self.assertTrue(self.verified.wait(2))

        self.pubsub_management.deactivate_subscription(subscription_id)

        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)

    def test_queue_cleanup(self):
        stream_id, route = self.pubsub_management.create_stream(
            'test_stream', 'xp1')
        xn_objs, _ = self.resource_registry.find_resources(
            restype=RT.ExchangeName, name='queue1')
        for xn_obj in xn_objs:
            xn = self.container.ex_manager.create_xn_queue(xn_obj.name)
            xn.delete()
        subscription_id = self.pubsub_management.create_subscription(
            'queue1', stream_ids=[stream_id])
        xn_ids, _ = self.resource_registry.find_resources(
            restype=RT.ExchangeName, name='queue1')
        self.assertEquals(len(xn_ids), 1)

        self.pubsub_management.delete_subscription(subscription_id)

        xn_ids, _ = self.resource_registry.find_resources(
            restype=RT.ExchangeName, name='queue1')
        self.assertEquals(len(xn_ids), 0)

    def test_activation_and_deactivation(self):
        stream_id, route = self.pubsub_management.create_stream(
            'stream1', 'xp1')
        subscription_id = self.pubsub_management.create_subscription(
            'sub1', stream_ids=[stream_id])

        self.check1 = Event()

        def verifier(m, r, s):
            self.check1.set()

        subscriber = StandaloneStreamSubscriber('sub1', verifier)
        subscriber.start()

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish('should not receive')

        self.assertFalse(self.check1.wait(0.25))

        self.pubsub_management.activate_subscription(subscription_id)

        publisher.publish('should receive')
        self.assertTrue(self.check1.wait(2))

        self.check1.clear()
        self.assertFalse(self.check1.is_set())

        self.pubsub_management.deactivate_subscription(subscription_id)

        publisher.publish('should not receive')
        self.assertFalse(self.check1.wait(0.5))

        self.pubsub_management.activate_subscription(subscription_id)

        publisher.publish('should receive')
        self.assertTrue(self.check1.wait(2))

        subscriber.stop()

        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)

    def test_topic_crud(self):

        topic_id = self.pubsub_management.create_topic(
            name='test_topic', exchange_point='test_xp')
        self.exchange_cleanup.append('test_xp')

        topic = self.pubsub_management.read_topic(topic_id)

        self.assertEquals(topic.name, 'test_topic')
        self.assertEquals(topic.exchange_point, 'test_xp')

        self.pubsub_management.delete_topic(topic_id)
        with self.assertRaises(NotFound):
            self.pubsub_management.read_topic(topic_id)

    def test_full_pubsub(self):

        self.sub1_sat = Event()
        self.sub2_sat = Event()

        def subscriber1(m, r, s):
            self.sub1_sat.set()

        def subscriber2(m, r, s):
            self.sub2_sat.set()

        sub1 = StandaloneStreamSubscriber('sub1', subscriber1)
        self.queue_cleanup.append(sub1.xn.queue)
        sub1.start()

        sub2 = StandaloneStreamSubscriber('sub2', subscriber2)
        self.queue_cleanup.append(sub2.xn.queue)
        sub2.start()

        log_topic = self.pubsub_management.create_topic(
            'instrument_logs', exchange_point='instruments')
        science_topic = self.pubsub_management.create_topic(
            'science_data', exchange_point='instruments')
        events_topic = self.pubsub_management.create_topic(
            'notifications', exchange_point='events')

        log_stream, route = self.pubsub_management.create_stream(
            'instrument1-logs',
            topic_ids=[log_topic],
            exchange_point='instruments')
        ctd_stream, route = self.pubsub_management.create_stream(
            'instrument1-ctd',
            topic_ids=[science_topic],
            exchange_point='instruments')
        event_stream, route = self.pubsub_management.create_stream(
            'notifications', topic_ids=[events_topic], exchange_point='events')
        raw_stream, route = self.pubsub_management.create_stream(
            'temp', exchange_point='global.data')
        self.exchange_cleanup.extend(['instruments', 'events', 'global.data'])

        subscription1 = self.pubsub_management.create_subscription(
            'subscription1',
            stream_ids=[log_stream, event_stream],
            exchange_name='sub1')
        subscription2 = self.pubsub_management.create_subscription(
            'subscription2',
            exchange_points=['global.data'],
            stream_ids=[ctd_stream],
            exchange_name='sub2')

        self.pubsub_management.activate_subscription(subscription1)
        self.pubsub_management.activate_subscription(subscription2)

        self.publish_on_stream(log_stream, 1)
        self.assertTrue(self.sub1_sat.wait(4))
        self.assertFalse(self.sub2_sat.is_set())

        self.publish_on_stream(raw_stream, 1)
        self.assertTrue(self.sub1_sat.wait(4))

        sub1.stop()
        sub2.stop()

    def test_topic_craziness(self):

        self.msg_queue = Queue()

        def subscriber1(m, r, s):
            self.msg_queue.put(m)

        sub1 = StandaloneStreamSubscriber('sub1', subscriber1)
        self.queue_cleanup.append(sub1.xn.queue)
        sub1.start()

        topic1 = self.pubsub_management.create_topic('topic1',
                                                     exchange_point='xp1')
        topic2 = self.pubsub_management.create_topic('topic2',
                                                     exchange_point='xp1',
                                                     parent_topic_id=topic1)
        topic3 = self.pubsub_management.create_topic('topic3',
                                                     exchange_point='xp1',
                                                     parent_topic_id=topic1)
        topic4 = self.pubsub_management.create_topic('topic4',
                                                     exchange_point='xp1',
                                                     parent_topic_id=topic2)
        topic5 = self.pubsub_management.create_topic('topic5',
                                                     exchange_point='xp1',
                                                     parent_topic_id=topic2)
        topic6 = self.pubsub_management.create_topic('topic6',
                                                     exchange_point='xp1',
                                                     parent_topic_id=topic3)
        topic7 = self.pubsub_management.create_topic('topic7',
                                                     exchange_point='xp1',
                                                     parent_topic_id=topic3)

        # Tree 2
        topic8 = self.pubsub_management.create_topic('topic8',
                                                     exchange_point='xp2')
        topic9 = self.pubsub_management.create_topic('topic9',
                                                     exchange_point='xp2',
                                                     parent_topic_id=topic8)
        topic10 = self.pubsub_management.create_topic('topic10',
                                                      exchange_point='xp2',
                                                      parent_topic_id=topic9)
        topic11 = self.pubsub_management.create_topic('topic11',
                                                      exchange_point='xp2',
                                                      parent_topic_id=topic9)
        topic12 = self.pubsub_management.create_topic('topic12',
                                                      exchange_point='xp2',
                                                      parent_topic_id=topic11)
        topic13 = self.pubsub_management.create_topic('topic13',
                                                      exchange_point='xp2',
                                                      parent_topic_id=topic11)
        self.exchange_cleanup.extend(['xp1', 'xp2'])

        stream1_id, route = self.pubsub_management.create_stream(
            'stream1',
            topic_ids=[topic7, topic4, topic5],
            exchange_point='xp1')
        stream2_id, route = self.pubsub_management.create_stream(
            'stream2', topic_ids=[topic8], exchange_point='xp2')
        stream3_id, route = self.pubsub_management.create_stream(
            'stream3', topic_ids=[topic10, topic13], exchange_point='xp2')
        stream4_id, route = self.pubsub_management.create_stream(
            'stream4', topic_ids=[topic9], exchange_point='xp2')
        stream5_id, route = self.pubsub_management.create_stream(
            'stream5', topic_ids=[topic11], exchange_point='xp2')

        subscription1 = self.pubsub_management.create_subscription(
            'sub1', topic_ids=[topic1])
        subscription2 = self.pubsub_management.create_subscription(
            'sub2', topic_ids=[topic8], exchange_name='sub1')
        subscription3 = self.pubsub_management.create_subscription(
            'sub3', topic_ids=[topic9], exchange_name='sub1')
        subscription4 = self.pubsub_management.create_subscription(
            'sub4',
            topic_ids=[topic10, topic13, topic11],
            exchange_name='sub1')
        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription1)

        self.publish_on_stream(stream1_id, 1)

        self.assertEquals(self.msg_queue.get(timeout=10), 1)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.1)

        self.pubsub_management.deactivate_subscription(subscription1)
        self.pubsub_management.delete_subscription(subscription1)
        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription2)

        self.publish_on_stream(stream2_id, 2)
        self.assertEquals(self.msg_queue.get(timeout=10), 2)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.1)

        self.pubsub_management.deactivate_subscription(subscription2)
        self.pubsub_management.delete_subscription(subscription2)

        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription3)

        self.publish_on_stream(stream2_id, 3)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.3)

        self.publish_on_stream(stream3_id, 4)
        self.assertEquals(self.msg_queue.get(timeout=10), 4)

        self.pubsub_management.deactivate_subscription(subscription3)
        self.pubsub_management.delete_subscription(subscription3)

        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription4)

        self.publish_on_stream(stream4_id, 5)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.3)

        self.publish_on_stream(stream5_id, 6)
        self.assertEquals(self.msg_queue.get(timeout=10), 6)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.3)

        self.pubsub_management.deactivate_subscription(subscription4)
        self.pubsub_management.delete_subscription(subscription4)

        #--------------------------------------------------------------------------------
        sub1.stop()

        self.pubsub_management.delete_topic(topic13)
        self.pubsub_management.delete_topic(topic12)
        self.pubsub_management.delete_topic(topic11)
        self.pubsub_management.delete_topic(topic10)
        self.pubsub_management.delete_topic(topic9)
        self.pubsub_management.delete_topic(topic8)
        self.pubsub_management.delete_topic(topic7)
        self.pubsub_management.delete_topic(topic6)
        self.pubsub_management.delete_topic(topic5)
        self.pubsub_management.delete_topic(topic4)
        self.pubsub_management.delete_topic(topic3)
        self.pubsub_management.delete_topic(topic2)
        self.pubsub_management.delete_topic(topic1)

        self.pubsub_management.delete_stream(stream1_id)
        self.pubsub_management.delete_stream(stream2_id)
        self.pubsub_management.delete_stream(stream3_id)
        self.pubsub_management.delete_stream(stream4_id)
        self.pubsub_management.delete_stream(stream5_id)

    def _get_pdict(self, filter_values):
        t_ctxt = ParameterContext(
            'TIME', param_type=QuantityType(value_encoding=np.dtype('int64')))
        t_ctxt.uom = 'seconds since 01-01-1900'
        t_ctxt_id = self.dataset_management.create_parameter_context(
            name='TIME',
            parameter_context=t_ctxt.dump(),
            parameter_type='quantity<int64>',
            unit_of_measure=t_ctxt.uom)

        lat_ctxt = ParameterContext(
            'LAT',
            param_type=ConstantType(
                QuantityType(value_encoding=np.dtype('float32'))),
            fill_value=-9999)
        lat_ctxt.axis = AxisTypeEnum.LAT
        lat_ctxt.uom = 'degree_north'
        lat_ctxt_id = self.dataset_management.create_parameter_context(
            name='LAT',
            parameter_context=lat_ctxt.dump(),
            parameter_type='quantity<float32>',
            unit_of_measure=lat_ctxt.uom)

        lon_ctxt = ParameterContext(
            'LON',
            param_type=ConstantType(
                QuantityType(value_encoding=np.dtype('float32'))),
            fill_value=-9999)
        lon_ctxt.axis = AxisTypeEnum.LON
        lon_ctxt.uom = 'degree_east'
        lon_ctxt_id = self.dataset_management.create_parameter_context(
            name='LON',
            parameter_context=lon_ctxt.dump(),
            parameter_type='quantity<float32>',
            unit_of_measure=lon_ctxt.uom)

        # Independent Parameters
        # Temperature - values expected to be the decimal results of conversion from hex
        temp_ctxt = ParameterContext(
            'TEMPWAT_L0',
            param_type=QuantityType(value_encoding=np.dtype('float32')),
            fill_value=-9999)
        temp_ctxt.uom = 'deg_C'
        temp_ctxt_id = self.dataset_management.create_parameter_context(
            name='TEMPWAT_L0',
            parameter_context=temp_ctxt.dump(),
            parameter_type='quantity<float32>',
            unit_of_measure=temp_ctxt.uom)

        # Conductivity - values expected to be the decimal results of conversion from hex
        cond_ctxt = ParameterContext(
            'CONDWAT_L0',
            param_type=QuantityType(value_encoding=np.dtype('float32')),
            fill_value=-9999)
        cond_ctxt.uom = 'S m-1'
        cond_ctxt_id = self.dataset_management.create_parameter_context(
            name='CONDWAT_L0',
            parameter_context=cond_ctxt.dump(),
            parameter_type='quantity<float32>',
            unit_of_measure=cond_ctxt.uom)

        # Pressure - values expected to be the decimal results of conversion from hex
        press_ctxt = ParameterContext(
            'PRESWAT_L0',
            param_type=QuantityType(value_encoding=np.dtype('float32')),
            fill_value=-9999)
        press_ctxt.uom = 'dbar'
        press_ctxt_id = self.dataset_management.create_parameter_context(
            name='PRESWAT_L0',
            parameter_context=press_ctxt.dump(),
            parameter_type='quantity<float32>',
            unit_of_measure=press_ctxt.uom)

        # Dependent Parameters

        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        tl1_func = '(T / 10000) - 10'
        tl1_pmap = {'T': 'TEMPWAT_L0'}
        expr = NumexprFunction('TEMPWAT_L1',
                               tl1_func, ['T'],
                               param_map=tl1_pmap)
        tempL1_ctxt = ParameterContext(
            'TEMPWAT_L1',
            param_type=ParameterFunctionType(function=expr),
            variability=VariabilityEnum.TEMPORAL)
        tempL1_ctxt.uom = 'deg_C'
        tempL1_ctxt_id = self.dataset_management.create_parameter_context(
            name=tempL1_ctxt.name,
            parameter_context=tempL1_ctxt.dump(),
            parameter_type='pfunc',
            unit_of_measure=tempL1_ctxt.uom)

        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        cl1_func = '(C / 100000) - 0.5'
        cl1_pmap = {'C': 'CONDWAT_L0'}
        expr = NumexprFunction('CONDWAT_L1',
                               cl1_func, ['C'],
                               param_map=cl1_pmap)
        condL1_ctxt = ParameterContext(
            'CONDWAT_L1',
            param_type=ParameterFunctionType(function=expr),
            variability=VariabilityEnum.TEMPORAL)
        condL1_ctxt.uom = 'S m-1'
        condL1_ctxt_id = self.dataset_management.create_parameter_context(
            name=condL1_ctxt.name,
            parameter_context=condL1_ctxt.dump(),
            parameter_type='pfunc',
            unit_of_measure=condL1_ctxt.uom)

        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)'
        pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721}
        expr = NumexprFunction('PRESWAT_L1',
                               pl1_func, ['P', 'p_range'],
                               param_map=pl1_pmap)
        presL1_ctxt = ParameterContext(
            'PRESWAT_L1',
            param_type=ParameterFunctionType(function=expr),
            variability=VariabilityEnum.TEMPORAL)
        presL1_ctxt.uom = 'S m-1'
        presL1_ctxt_id = self.dataset_management.create_parameter_context(
            name=presL1_ctxt.name,
            parameter_context=presL1_ctxt.dump(),
            parameter_type='pfunc',
            unit_of_measure=presL1_ctxt.uom)

        # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project:
        #       https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1

        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        owner = 'gsw'
        sal_func = 'SP_from_C'
        sal_arglist = ['C', 't', 'p']
        sal_pmap = {
            'C':
            NumexprFunction('CONDWAT_L1*10',
                            'C*10', ['C'],
                            param_map={'C': 'CONDWAT_L1'}),
            't':
            'TEMPWAT_L1',
            'p':
            'PRESWAT_L1'
        }
        sal_kwargmap = None
        expr = PythonFunction('PRACSAL', owner, sal_func, sal_arglist,
                              sal_kwargmap, sal_pmap)
        sal_ctxt = ParameterContext('PRACSAL',
                                    param_type=ParameterFunctionType(expr),
                                    variability=VariabilityEnum.TEMPORAL)
        sal_ctxt.uom = 'g kg-1'
        sal_ctxt_id = self.dataset_management.create_parameter_context(
            name=sal_ctxt.name,
            parameter_context=sal_ctxt.dump(),
            parameter_type='pfunc',
            unit_of_measure=sal_ctxt.uom)

        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        owner = 'gsw'
        abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP',
                                      ['PRACSAL', 'PRESWAT_L1', 'LON', 'LAT'])
        cons_temp_expr = PythonFunction(
            'cons_temp', owner, 'CT_from_t',
            [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1'])
        dens_expr = PythonFunction(
            'DENSITY', owner, 'rho',
            [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1'])
        dens_ctxt = ParameterContext(
            'DENSITY',
            param_type=ParameterFunctionType(dens_expr),
            variability=VariabilityEnum.TEMPORAL)
        dens_ctxt.uom = 'kg m-3'
        dens_ctxt_id = self.dataset_management.create_parameter_context(
            name=dens_ctxt.name,
            parameter_context=dens_ctxt.dump(),
            parameter_type='pfunc',
            unit_of_measure=dens_ctxt.uom)

        ids = [
            t_ctxt_id, lat_ctxt_id, lon_ctxt_id, temp_ctxt_id, cond_ctxt_id,
            press_ctxt_id, tempL1_ctxt_id, condL1_ctxt_id, presL1_ctxt_id,
            sal_ctxt_id, dens_ctxt_id
        ]
        contexts = [
            t_ctxt, lat_ctxt, lon_ctxt, temp_ctxt, cond_ctxt, press_ctxt,
            tempL1_ctxt, condL1_ctxt, presL1_ctxt, sal_ctxt, dens_ctxt
        ]
        context_ids = [
            ids[i] for i, ctxt in enumerate(contexts)
            if ctxt.name in filter_values
        ]
        pdict_name = '_'.join(
            [ctxt.name for ctxt in contexts if ctxt.name in filter_values])

        try:
            self.pdicts[pdict_name]
            return self.pdicts[pdict_name]
        except KeyError:
            pdict_id = self.dataset_management.create_parameter_dictionary(
                pdict_name,
                parameter_context_ids=context_ids,
                temporal_context='time')
            self.pdicts[pdict_name] = pdict_id
            return pdict_id
コード例 #23
0
    def _setup_resources(self):
        # TODO: some or all of this (or some variation) should move to DAMS'

        # Build the test resources for the dataset
        dms_cli = DatasetManagementServiceClient()
        dams_cli = DataAcquisitionManagementServiceClient()
        dpms_cli = DataProductManagementServiceClient()
        rr_cli = ResourceRegistryServiceClient()
        pubsub_cli = PubsubManagementServiceClient()

        eda = ExternalDatasetAgent(handler_module=self.DVR_CONFIG['dvr_mod'],
            handler_class=self.DVR_CONFIG['dvr_cls'])
        eda_id = dams_cli.create_external_dataset_agent(eda)

        eda_inst = ExternalDatasetAgentInstance()
        eda_inst_id = dams_cli.create_external_dataset_agent_instance(eda_inst, external_dataset_agent_id=eda_id)

        # Create and register the necessary resources/objects

        # Create DataProvider
        dprov = ExternalDataProvider(institution=Institution(), contact=ContactInformation())
        dprov.contact.individual_names_given = 'Christopher Mueller'
        dprov.contact.email = '*****@*****.**'

        # Create DataSource
        dsrc = DataSource(protocol_type='FILE', institution=Institution(), contact=ContactInformation())
        dsrc.connection_params['base_data_url'] = ''
        dsrc.contact.individual_names_given = 'Tim Giguere'
        dsrc.contact.email = '*****@*****.**'

        # Create ExternalDataset
        ds_name = 'slocum_test_dataset'
        dset = ExternalDataset(name=ds_name, dataset_description=DatasetDescription(), update_description=UpdateDescription(), contact=ContactInformation())

        dset.dataset_description.parameters['base_url'] = 'test_data/slocum/'
        dset.dataset_description.parameters['list_pattern'] = 'ru05-2012-021-0-0-sbd.dat'
        dset.dataset_description.parameters['date_pattern'] = '%Y %j'
        dset.dataset_description.parameters['date_extraction_pattern'] = 'ru05-([\d]{4})-([\d]{3})-\d-\d-sbd.dat'
        dset.dataset_description.parameters['temporal_dimension'] = None
        dset.dataset_description.parameters['zonal_dimension'] = None
        dset.dataset_description.parameters['meridional_dimension'] = None
        dset.dataset_description.parameters['vertical_dimension'] = None
        dset.dataset_description.parameters['variables'] = [
            'c_wpt_y_lmc',
            'sci_water_cond',
            'm_y_lmc',
            'u_hd_fin_ap_inflection_holdoff',
            'sci_m_present_time',
            'm_leakdetect_voltage_forward',
            'sci_bb3slo_b660_scaled',
            'c_science_send_all',
            'm_gps_status',
            'm_water_vx',
            'm_water_vy',
            'c_heading',
            'sci_fl3slo_chlor_units',
            'u_hd_fin_ap_gain',
            'm_vacuum',
            'u_min_water_depth',
            'm_gps_lat',
            'm_veh_temp',
            'f_fin_offset',
            'u_hd_fin_ap_hardover_holdoff',
            'c_alt_time',
            'm_present_time',
            'm_heading',
            'sci_bb3slo_b532_scaled',
            'sci_fl3slo_cdom_units',
            'm_fin',
            'x_cycle_overrun_in_ms',
            'sci_water_pressure',
            'u_hd_fin_ap_igain',
            'sci_fl3slo_phyco_units',
            'm_battpos',
            'sci_bb3slo_b470_scaled',
            'm_lat',
            'm_gps_lon',
            'sci_ctd41cp_timestamp',
            'm_pressure',
            'c_wpt_x_lmc',
            'c_ballast_pumped',
            'x_lmc_xy_source',
            'm_lon',
            'm_avg_speed',
            'sci_water_temp',
            'u_pitch_ap_gain',
            'm_roll',
            'm_tot_num_inflections',
            'm_x_lmc',
            'u_pitch_ap_deadband',
            'm_final_water_vy',
            'm_final_water_vx',
            'm_water_depth',
            'm_leakdetect_voltage',
            'u_pitch_max_delta_battpos',
            'm_coulomb_amphr',
            'm_pitch', ]

        # Create DataSourceModel
        dsrc_model = DataSourceModel(name='slocum_model')
        #        dsrc_model.model = 'SLOCUM'
        dsrc_model.data_handler_module = 'N/A'
        dsrc_model.data_handler_class = 'N/A'

        ## Run everything through DAMS
        ds_id = dams_cli.create_external_dataset(external_dataset=dset)
        ext_dprov_id = dams_cli.create_external_data_provider(external_data_provider=dprov)
        ext_dsrc_id = dams_cli.create_data_source(data_source=dsrc)
        ext_dsrc_model_id = dams_cli.create_data_source_model(dsrc_model)

        # Register the ExternalDataset
        dproducer_id = dams_cli.register_external_data_set(external_dataset_id=ds_id)

        # Or using each method
        dams_cli.assign_data_source_to_external_data_provider(data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id)
        dams_cli.assign_data_source_to_data_model(data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id)
        dams_cli.assign_external_dataset_to_data_source(external_dataset_id=ds_id, data_source_id=ext_dsrc_id)
        dams_cli.assign_external_dataset_to_agent_instance(external_dataset_id=ds_id, agent_instance_id=eda_inst_id)
        #        dams_cli.assign_external_data_agent_to_agent_instance(external_data_agent_id=self.eda_id, agent_instance_id=self.eda_inst_id)

        #create temp streamdef so the data product can create the stream
        pc_list = []
        for pc_k, pc in self._create_parameter_dictionary().iteritems():
            pc_list.append(dms_cli.create_parameter_context(pc_k, pc[1].dump()))

        pdict_id = dms_cli.create_parameter_dictionary('slocum_param_dict', pc_list)

        streamdef_id = pubsub_cli.create_stream_definition(name="slocum_stream_def", description="stream def for slocum testing", parameter_dictionary_id=pdict_id)

        #        dpms_cli.create_data_product()

        # Generate the data product and associate it to the ExternalDataset

        tdom, sdom = time_series_domain()
        tdom, sdom = tdom.dump(), sdom.dump()

        dprod = IonObject(RT.DataProduct,
            name='slocum_parsed_product',
            description='parsed slocum product',
            temporal_domain=tdom,
            spatial_domain=sdom)

        dproduct_id = dpms_cli.create_data_product(data_product=dprod,
            stream_definition_id=streamdef_id)

        dams_cli.assign_data_product(input_resource_id=ds_id, data_product_id=dproduct_id)

        stream_id, assn = rr_cli.find_objects(subject=dproduct_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True)
        stream_id = stream_id[0]

        log.info('Created resources: {0}'.format({'ExternalDataset': ds_id, 'ExternalDataProvider': ext_dprov_id, 'DataSource': ext_dsrc_id, 'DataSourceModel': ext_dsrc_model_id, 'DataProducer': dproducer_id, 'DataProduct': dproduct_id, 'Stream': stream_id}))

        # Create the logger for receiving publications
        _, stream_route, _ = self.create_stream_and_logger(name='slocum', stream_id=stream_id)

        self.EDA_RESOURCE_ID = ds_id
        self.EDA_NAME = ds_name
        self.DVR_CONFIG['dh_cfg'] = {
            'TESTING': True,
            'stream_id': stream_id,
            'stream_route': stream_route,
            'stream_def': streamdef_id,
            'external_dataset_res': dset,
            'data_producer_id': dproducer_id,  # CBM: Should this be put in the main body of the config - with mod & cls?
            'max_records': 20,
            }
コード例 #24
0
    def _setup_resources(self):
        # TODO: some or all of this (or some variation) should move to DAMS'

        # Build the test resources for the dataset
        dms_cli = DatasetManagementServiceClient()
        dams_cli = DataAcquisitionManagementServiceClient()
        dpms_cli = DataProductManagementServiceClient()
        rr_cli = ResourceRegistryServiceClient()
        pubsub_cli = PubsubManagementServiceClient()

        eda = ExternalDatasetAgent(name='example data agent',
                                   handler_module=self.DVR_CONFIG['dvr_mod'],
                                   handler_class=self.DVR_CONFIG['dvr_cls'])
        eda_id = dams_cli.create_external_dataset_agent(eda)

        eda_inst = ExternalDatasetAgentInstance(
            name='example dataset agent instance')
        eda_inst_id = dams_cli.create_external_dataset_agent_instance(
            eda_inst, external_dataset_agent_id=eda_id)

        # Create and register the necessary resources/objects

        # Create DataProvider
        dprov = ExternalDataProvider(name='example data provider',
                                     institution=Institution(),
                                     contact=ContactInformation())
        dprov.contact.individual_names_given = 'Christopher Mueller'
        dprov.contact.email = '*****@*****.**'

        # Create DataSource
        dsrc = DataSource(name='example datasource',
                          protocol_type='FILE',
                          institution=Institution(),
                          contact=ContactInformation())
        dsrc.connection_params['base_data_url'] = ''
        dsrc.contact.individual_names_given = 'Tim Giguere'
        dsrc.contact.email = '*****@*****.**'

        # Create ExternalDataset
        ds_name = 'ruv_test_dataset'
        dset = ExternalDataset(name=ds_name,
                               dataset_description=DatasetDescription(),
                               update_description=UpdateDescription(),
                               contact=ContactInformation())

        dset.dataset_description.parameters['base_url'] = 'test_data/ruv/'
        dset.dataset_description.parameters[
            'list_pattern'] = 'RDLi_SEAB_2011_08_24_1600.ruv'
        dset.dataset_description.parameters['date_pattern'] = '%Y %m %d %H %M'
        dset.dataset_description.parameters[
            'date_extraction_pattern'] = 'RDLi_SEAB_([\d]{4})_([\d]{2})_([\d]{2})_([\d]{2})([\d]{2}).ruv'
        dset.dataset_description.parameters['temporal_dimension'] = None
        dset.dataset_description.parameters['zonal_dimension'] = None
        dset.dataset_description.parameters['meridional_dimension'] = None
        dset.dataset_description.parameters['vertical_dimension'] = None
        dset.dataset_description.parameters['variables'] = []

        # Create DataSourceModel
        dsrc_model = DataSourceModel(name='ruv_model')
        #dsrc_model.model = 'RUV'
        dsrc_model.data_handler_module = 'N/A'
        dsrc_model.data_handler_class = 'N/A'

        ## Run everything through DAMS
        ds_id = dams_cli.create_external_dataset(external_dataset=dset)
        ext_dprov_id = dams_cli.create_external_data_provider(
            external_data_provider=dprov)
        ext_dsrc_id = dams_cli.create_data_source(data_source=dsrc)
        ext_dsrc_model_id = dams_cli.create_data_source_model(dsrc_model)

        # Register the ExternalDataset
        dproducer_id = dams_cli.register_external_data_set(
            external_dataset_id=ds_id)

        # Or using each method
        dams_cli.assign_data_source_to_external_data_provider(
            data_source_id=ext_dsrc_id, external_data_provider_id=ext_dprov_id)
        dams_cli.assign_data_source_to_data_model(
            data_source_id=ext_dsrc_id, data_source_model_id=ext_dsrc_model_id)
        dams_cli.assign_external_dataset_to_data_source(
            external_dataset_id=ds_id, data_source_id=ext_dsrc_id)
        dams_cli.assign_external_dataset_to_agent_instance(
            external_dataset_id=ds_id, agent_instance_id=eda_inst_id)

        pdict = ParameterDictionary()

        t_ctxt = ParameterContext(
            'data',
            param_type=QuantityType(value_encoding=numpy.dtype('int64')))
        t_ctxt.axis = AxisTypeEnum.TIME
        t_ctxt.uom = 'seconds since 01-01-1970'
        pdict.add_context(t_ctxt)

        #create temp streamdef so the data product can create the stream
        pc_list = []
        for pc_k, pc in pdict.iteritems():
            pc_list.append(dms_cli.create_parameter_context(
                pc_k, pc[1].dump()))

        pdict_id = dms_cli.create_parameter_dictionary('ruv_param_dict',
                                                       pc_list)

        streamdef_id = pubsub_cli.create_stream_definition(
            name="ruv",
            description="stream def for ruv testing",
            parameter_dictionary_id=pdict_id)

        dprod = IonObject(RT.DataProduct,
                          name='ruv_parsed_product',
                          description='parsed ruv product')

        # Generate the data product and associate it to the ExternalDataset
        dproduct_id = dpms_cli.create_data_product(
            data_product=dprod, stream_definition_id=streamdef_id)

        dams_cli.assign_data_product(input_resource_id=ds_id,
                                     data_product_id=dproduct_id)

        stream_id, assn = rr_cli.find_objects(subject=dproduct_id,
                                              predicate=PRED.hasStream,
                                              object_type=RT.Stream,
                                              id_only=True)
        stream_id = stream_id[0]

        log.info('Created resources: {0}'.format({
            'ExternalDataset': ds_id,
            'ExternalDataProvider': ext_dprov_id,
            'DataSource': ext_dsrc_id,
            'DataSourceModel': ext_dsrc_model_id,
            'DataProducer': dproducer_id,
            'DataProduct': dproduct_id,
            'Stream': stream_id
        }))

        #CBM: Eventually, probably want to group this crap somehow - not sure how yet...

        # Create the logger for receiving publications
        _, stream_route, _ = self.create_stream_and_logger(name='ruv',
                                                           stream_id=stream_id)

        self.EDA_RESOURCE_ID = ds_id
        self.EDA_NAME = ds_name
        self.DVR_CONFIG['dh_cfg'] = {
            'TESTING': True,
            'stream_id': stream_id,
            'stream_route': stream_route,
            'external_dataset_res': dset,
            'param_dictionary': pdict.dump(),
            'data_producer_id':
            dproducer_id,  # CBM: Should this be put in the main body of the config - with mod & cls?
            'max_records': 20,
        }
コード例 #25
0
class TestCTDPChain(IonIntegrationTestCase):
    def setUp(self):
        super(TestCTDPChain, self).setUp()

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.pubsub            = PubsubManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.dataproduct_management = DataProductManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()

        # This is for the time values inside the packets going into the transform
        self.i = 0
        self.cnt = 0

        # Cleanup of queue created by the subscriber
        self.queue_cleanup = []
        self.data_process_cleanup = []

    def _get_new_ctd_L0_packet(self, stream_definition_id, length):

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)
        rdt['time'] = numpy.arange(self.i, self.i+length)

        for field in rdt:
            if isinstance(rdt._pdict.get_context(field).param_type, QuantityType):
                rdt[field] = numpy.array([random.uniform(0.0,75.0)  for i in xrange(length)])

        g = rdt.to_granule()
        self.i+=length

        return g

    def clean_queues(self):
        for queue in self.queue_cleanup:
            xn = self.container.ex_manager.create_xn_queue(queue)
            xn.delete()

    def cleaning_operations(self):
        for dproc_id in self.data_process_cleanup:
            self.data_process_management.delete_data_process(dproc_id)

    def test_ctdp_chain(self):
        """
        Test that packets are processed by a chain of CTDP transforms: L0, L1 and L2
        """

        #-------------------------------------------------------------------------------------
        # Prepare the stream def to be used for transform chain
        #-------------------------------------------------------------------------------------

        #todo Check whether the right parameter dictionary is being used
        self._prepare_stream_def_for_transform_chain()

        #-------------------------------------------------------------------------------------
        # Prepare the data proc defs and in and out data products for the transforms
        #-------------------------------------------------------------------------------------

        # list_args_L0 = [data_proc_def_id, input_dpod_id, output_dpod_id]
        list_args_L0 = self._prepare_things_you_need_to_launch_transform(name_of_transform='L0')

        list_args_L1 = self._prepare_things_you_need_to_launch_transform(name_of_transform='L1')

        list_args_L2_density = self._prepare_things_you_need_to_launch_transform(name_of_transform='L2_density')

        list_args_L2_salinity = self._prepare_things_you_need_to_launch_transform(name_of_transform='L2_salinity')

        log.debug("Got the following args: L0 = %s, L1 = %s, L2 density = %s, L2 salinity = %s", list_args_L0, list_args_L1, list_args_L2_density, list_args_L2_salinity )

        #-------------------------------------------------------------------------------------
        # Launch the CTDP transforms
        #-------------------------------------------------------------------------------------

        L0_data_proc_id = self._launch_transform('L0', *list_args_L0)

        L1_data_proc_id = self._launch_transform('L1', *list_args_L1)

        L2_density_data_proc_id = self._launch_transform('L2_density', *list_args_L2_density)

        L2_salinity_data_proc_id = self._launch_transform('L2_salinity', *list_args_L2_salinity)

        log.debug("Launched the transforms: L0 = %s, L1 = %s", L0_data_proc_id, L1_data_proc_id)

        #-------------------------------------------------------------------------
        # Start a subscriber listening to the output of each of the transforms
        #-------------------------------------------------------------------------

        ar_L0 = self.start_subscriber_listening_to_L0_transform(out_data_prod_id=list_args_L0[2])
        ar_L1 = self.start_subscriber_listening_to_L1_transform(out_data_prod_id=list_args_L1[2])

        ar_L2_density = self.start_subscriber_listening_to_L2_density_transform(out_data_prod_id=list_args_L2_density[2])
        ar_L2_salinity = self.start_subscriber_listening_to_L2_density_transform(out_data_prod_id=list_args_L2_salinity[2])


        #-------------------------------------------------------------------
        # Publish the parsed packets that the L0 transform is listening for
        #-------------------------------------------------------------------

        stream_id, stream_route = self.get_stream_and_route_for_data_prod(data_prod_id= list_args_L0[1])
        self._publish_for_L0_transform(stream_id, stream_route)

        #-------------------------------------------------------------------
        # Check the granules being outputted by the transforms
        #-------------------------------------------------------------------
        self._check_granule_from_L0_transform(ar_L0)
        self._check_granule_from_L1_transform(ar_L1)
        self._check_granule_from_L2_density_transform(ar_L2_density)
        self._check_granule_from_L2_salinity_transform(ar_L2_salinity)


    def _prepare_stream_def_for_transform_chain(self):

        # Get the stream definition for the stream using the parameter dictionary
#        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(parameter_dict_name, id_only=True)
        pdict_id = self._create_input_param_dict_for_test(parameter_dict_name = 'input_param_for_L0')
        self.in_stream_def_id_for_L0 = self.pubsub.create_stream_definition(name='stream_def_for_L0', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub.delete_stream_definition, self.in_stream_def_id_for_L0)

        pdict_id = self._create_input_param_dict_for_test(parameter_dict_name = 'params_for_other_transforms')
        self.stream_def_id = self.pubsub.create_stream_definition(name='stream_def_for_CTDBP_transforms', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub.delete_stream_definition, self.stream_def_id)

        log.debug("Got the parsed parameter dictionary: id: %s", pdict_id)
        log.debug("Got the stream def for parsed input to L0: %s", self.in_stream_def_id_for_L0)
        log.debug("Got the stream def for other other streams: %s", self.stream_def_id)

    def _prepare_things_you_need_to_launch_transform(self, name_of_transform = ''):

        module, class_name = self._get_class_module(name_of_transform)

        #-------------------------------------------------------------------------
        # Data Process Definition
        #-------------------------------------------------------------------------

        dpd_obj = IonObject(RT.DataProcessDefinition,
            name= 'CTDBP_%s_Transform' % name_of_transform,
            description= 'Data Process Definition for the CTDBP %s transform.' % name_of_transform,
            module= module,
            class_name=class_name)

        data_proc_def_id = self.data_process_management.create_data_process_definition(dpd_obj)
        self.addCleanup(self.data_process_management.delete_data_process_definition, data_proc_def_id)
        log.debug("created data process definition: id = %s", data_proc_def_id)

        #-------------------------------------------------------------------------
        # Construct temporal and spatial Coordinate Reference System objects for the data product objects
        #-------------------------------------------------------------------------

        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()

        #-------------------------------------------------------------------------
        # Get the names of the input and output data products
        #-------------------------------------------------------------------------

        input_dpod_id = ''
        output_dpod_id = ''

        if name_of_transform == 'L0':

            input_dpod_id = self._create_input_data_product('parsed', tdom, sdom)
            output_dpod_id = self._create_output_data_product('L0', tdom, sdom)

            self.in_prod_for_L1 = output_dpod_id

        elif name_of_transform == 'L1':

            input_dpod_id = self.in_prod_for_L1
            output_dpod_id = self._create_output_data_product('L1', tdom, sdom)

            self.in_prod_for_L2 = output_dpod_id

        elif name_of_transform == 'L2_density':

            input_dpod_id = self.in_prod_for_L2
            output_dpod_id = self._create_output_data_product('L2_density', tdom, sdom)

        elif name_of_transform == 'L2_salinity':

            input_dpod_id = self.in_prod_for_L2
            output_dpod_id = self._create_output_data_product('L2_salinity', tdom, sdom)

        else:
            self.fail("something bad happened")

        return [data_proc_def_id, input_dpod_id, output_dpod_id]

    def _get_class_module(self, name_of_transform):

        options = {'L0' : self._class_module_L0,
                   'L1' : self._class_module_L1,
                   'L2_density' : self._class_module_L2_density,
                   'L2_salinity' : self._class_module_L2_salinity}

        return options[name_of_transform]()

    def _class_module_L0(self):
        module = 'ion.processes.data.transforms.ctdbp.ctdbp_L0'
        class_name = 'CTDBP_L0_all'
        return module, class_name

    def _class_module_L1(self):

        module = 'ion.processes.data.transforms.ctdbp.ctdbp_L1'
        class_name = 'CTDBP_L1_Transform'
        return module, class_name

    def _class_module_L2_density(self):

        module = 'ion.processes.data.transforms.ctdbp.ctdbp_L2_density'
        class_name = 'CTDBP_DensityTransform'

        return module, class_name

    def _class_module_L2_salinity(self):

        module = 'ion.processes.data.transforms.ctdbp.ctdbp_L2_salinity'
        class_name = 'CTDBP_SalinityTransform'

        return module, class_name

    def _create_input_data_product(self, name_of_transform = '', tdom = None, sdom = None):

        dpod_obj = IonObject(RT.DataProduct,
            name='dprod_%s' % name_of_transform,
            description='for_%s' % name_of_transform,
            temporal_domain = tdom,
            spatial_domain = sdom)

        log.debug("the stream def id: %s", self.stream_def_id)

        if name_of_transform == 'L0':
            stream_def_id = self.in_stream_def_id_for_L0
        else:
            stream_def_id = self.stream_def_id


        dpod_id = self.dataproduct_management.create_data_product(data_product=dpod_obj,
            stream_definition_id= stream_def_id
        )
        self.addCleanup(self.dataproduct_management.delete_data_product, dpod_id)

        log.debug("got the data product out. id: %s", dpod_id)

        return dpod_id

    def _create_output_data_product(self, name_of_transform = '', tdom = None, sdom = None):

        dpod_obj = IonObject(RT.DataProduct,
            name='dprod_%s' % name_of_transform,
            description='for_%s' % name_of_transform,
            temporal_domain = tdom,
            spatial_domain = sdom)

        if name_of_transform == 'L0':
            stream_def_id = self.in_stream_def_id_for_L0
        else:
            stream_def_id = self.stream_def_id

        dpod_id = self.dataproduct_management.create_data_product(data_product=dpod_obj,
            stream_definition_id=stream_def_id
        )
        self.addCleanup(self.dataproduct_management.delete_data_product, dpod_id)

        return dpod_id

    def _launch_transform(self, name_of_transform = '', data_proc_def_id = None, input_dpod_id = None, output_dpod_id = None):

        # We need the key name here to be "L2_stream", since when the data process is launched, this name goes into
        # the config as in config.process.publish_streams.L2_stream when the config is used to launch the data process

        if name_of_transform in ['L0', 'L1']:
            binding = '%s_stream' % name_of_transform
        elif name_of_transform == 'L2_salinity':
            binding = 'salinity'
        elif name_of_transform == 'L2_density':
            binding = 'density'

        config = None
        if name_of_transform == 'L1':
            config = self._create_calibration_coefficients_dict()
        elif name_of_transform == 'L2_density':
            config = DotDict()
            config.process = {'lat' : 32.7153, 'lon' : 117.1564}

        log.debug("launching transform for name: %s",name_of_transform )
        log.debug("launching transform for data_proc_def_id: %s\ninput_dpod_id: %s\noutput_dpod_id: %s", data_proc_def_id, input_dpod_id, output_dpod_id )

        data_proc_id = self.data_process_management.create_data_process(
                                                        data_process_definition_id = data_proc_def_id,
                                                        in_data_product_ids= [input_dpod_id],
                                                        out_data_product_ids = [output_dpod_id],
                                                        configuration = config)

        self.addCleanup(self.data_process_management.delete_data_process, data_proc_id)

        self.data_process_management.activate_data_process(data_proc_id)
        self.addCleanup(self.data_process_management.deactivate_data_process, data_proc_id)

        log.debug("Created a data process for ctdbp %s transform: id = %s", name_of_transform, data_proc_id)

        return data_proc_id

    def get_stream_and_route_for_data_prod(self, data_prod_id = ''):

        stream_ids, _ = self.resource_registry.find_objects(data_prod_id, PRED.hasStream, RT.Stream, True)
        stream_id = stream_ids[0]
        input_stream = self.resource_registry.read(stream_id)
        stream_route = input_stream.stream_route

        return stream_id, stream_route

    def start_subscriber_listening_to_L0_transform(self, out_data_prod_id = ''):

        #----------- Create subscribers to listen to the two transforms --------------------------------

        stream_ids, _ = self.resource_registry.find_objects(out_data_prod_id, PRED.hasStream, RT.Stream, True)
        output_stream_id_of_transform = stream_ids[0]

        ar_L0 = self._start_subscriber_to_transform( name_of_transform = 'L0',stream_id=output_stream_id_of_transform)

        return ar_L0


    def start_subscriber_listening_to_L1_transform(self, out_data_prod_id = ''):

        #----------- Create subscribers to listen to the two transforms --------------------------------

        stream_ids, _ = self.resource_registry.find_objects(out_data_prod_id, PRED.hasStream, RT.Stream, True)
        output_stream_id_of_transform = stream_ids[0]

        ar_L1 = self._start_subscriber_to_transform( name_of_transform = 'L1',stream_id=output_stream_id_of_transform)

        return ar_L1


    def start_subscriber_listening_to_L2_density_transform(self, out_data_prod_id = ''):

        #----------- Create subscribers to listen to the two transforms --------------------------------

        stream_ids, _ = self.resource_registry.find_objects(out_data_prod_id, PRED.hasStream, RT.Stream, True)
        output_stream_id_of_transform = stream_ids[0]

        ar_L2_density = self._start_subscriber_to_transform( name_of_transform = 'L2_density', stream_id=output_stream_id_of_transform)

        return ar_L2_density

    def start_subscriber_listening_to_L2_salinity_transform(self, out_data_prod_id = ''):

        #----------- Create subscribers to listen to the two transforms --------------------------------

        stream_ids, _ = self.resource_registry.find_objects(out_data_prod_id, PRED.hasStream, RT.Stream, True)
        output_stream_id_of_transform = stream_ids[0]

        ar_L2_density = self._start_subscriber_to_transform( name_of_transform = 'L2_salinity',stream_id=output_stream_id_of_transform)

        return ar_L2_density

    def _start_subscriber_to_transform(self,  name_of_transform = '', stream_id = ''):

        ar = gevent.event.AsyncResult()
        def subscriber(m,r,s):
            ar.set(m)

        sub = StandaloneStreamSubscriber(exchange_name='sub_%s' % name_of_transform, callback=subscriber)

        # Note that this running the below line creates an exchange since none of that name exists before
        sub_id = self.pubsub.create_subscription('subscriber_to_transform_%s' % name_of_transform,
            stream_ids=[stream_id],
            exchange_name='sub_%s' % name_of_transform)
        self.addCleanup(self.pubsub.delete_subscription, sub_id)

        self.pubsub.activate_subscription(sub_id)
        self.addCleanup(self.pubsub.deactivate_subscription, sub_id)

        sub.start()
        self.addCleanup(sub.stop)

        return ar

    def _check_granule_from_L0_transform(self, ar = None):

        granule_from_transform = ar.get(timeout=20)
        log.debug("Got the following granule from the L0 transform: %s", granule_from_transform)

        # Check the algorithm being applied
        self._check_application_of_L0_algorithm(granule_from_transform)


    def _check_granule_from_L1_transform(self, ar = None):

        granule_from_transform = ar.get(timeout=20)
        log.debug("Got the following granule from the L1 transform: %s", granule_from_transform)

        # Check the algorithm being applied
        self._check_application_of_L1_algorithm(granule_from_transform)

    def _check_granule_from_L2_density_transform(self, ar = None):

        granule_from_transform = ar.get(timeout=20)
        log.debug("Got the following granule from the L2 transform: %s", granule_from_transform)

        # Check the algorithm being applied
        self._check_application_of_L2_density_algorithm(granule_from_transform)

    def _check_granule_from_L2_salinity_transform(self, ar = None):

        granule_from_transform = ar.get(timeout=20)
        log.debug("Got the following granule from the L2 transform: %s", granule_from_transform)

        # Check the algorithm being applied
        self._check_application_of_L2_salinity_algorithm(granule_from_transform)


    def _check_application_of_L0_algorithm(self, granule = None):
        """ Check the algorithm applied by the L0 transform """

        rdt = RecordDictionaryTool.load_from_granule(granule)

        list_of_expected_keys = ['time', 'pressure', 'conductivity', 'temperature']

        for key in list_of_expected_keys:
            self.assertIn(key, rdt)

    def _check_application_of_L1_algorithm(self, granule = None):
        """ Check the algorithm applied by the L1 transform """
        rdt = RecordDictionaryTool.load_from_granule(granule)

        list_of_expected_keys = [ 'time', 'pressure', 'conductivity', 'temp']

        for key in list_of_expected_keys:
            self.assertIn(key, rdt)

    def _check_application_of_L2_density_algorithm(self, granule = None):
        """ Check the algorithm applied by the L2 transform """
        rdt = RecordDictionaryTool.load_from_granule(granule)

        list_of_expected_keys = ['time', 'density']

        for key in list_of_expected_keys:
            self.assertIn(key, rdt)

    def _check_application_of_L2_salinity_algorithm(self, granule = None):
        """ Check the algorithm applied by the L2 transform """
        rdt = RecordDictionaryTool.load_from_granule(granule)

        list_of_expected_keys = ['time', 'salinity']

        for key in list_of_expected_keys:
            self.assertIn(key, rdt)

    def _publish_for_L0_transform(self, input_stream_id = None, stream_route = None):

        #----------- Publish on that stream so that the transform can receive it --------------------------------
        self._publish_to_transform(input_stream_id, stream_route )

    def _publish_to_transform(self, stream_id = '', stream_route = None):

        pub = StandaloneStreamPublisher(stream_id, stream_route)
        publish_granule = self._get_new_ctd_L0_packet(stream_definition_id=self.in_stream_def_id_for_L0, length = 5)
        pub.publish(publish_granule)

        log.debug("Published the following granule: %s", publish_granule)


    def _create_input_param_dict_for_test(self, parameter_dict_name = ''):

        pdict = ParameterDictionary()

        t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=numpy.dtype('float64')))
        t_ctxt.axis = AxisTypeEnum.TIME
        t_ctxt.uom = 'seconds since 01-01-1900'
        pdict.add_context(t_ctxt)

        cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        cond_ctxt.uom = 'Siemens_per_meter'
        pdict.add_context(cond_ctxt)

        pres_ctxt = ParameterContext('pressure', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        pres_ctxt.uom = 'Pascal'
        pdict.add_context(pres_ctxt)

        if parameter_dict_name == 'input_param_for_L0':
            temp_ctxt = ParameterContext('temperature', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        else:
            temp_ctxt = ParameterContext('temp', param_type=QuantityType(value_encoding=numpy.dtype('float32')))

        temp_ctxt.uom = 'degree_kelvin'
        pdict.add_context(temp_ctxt)

        dens_ctxt = ParameterContext('density', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        dens_ctxt.uom = 'g/m'
        pdict.add_context(dens_ctxt)

        sal_ctxt = ParameterContext('salinity', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        sal_ctxt.uom = 'PSU'
        pdict.add_context(sal_ctxt)

        #create temp streamdef so the data product can create the stream
        pc_list = []
        for pc_k, pc in pdict.iteritems():
            ctxt_id = self.dataset_management.create_parameter_context(pc_k, pc[1].dump())
            pc_list.append(ctxt_id)
            if parameter_dict_name == 'input_param_for_L0':
                self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id)
            elif pc[1].name == 'temp':
                self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id)

        pdict_id = self.dataset_management.create_parameter_dictionary(parameter_dict_name, pc_list)
        self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)

        return pdict_id



    def _create_calibration_coefficients_dict(self):
        config = DotDict()
        config.process.calibration_coeffs = {
                  'temp_calibration_coeffs': {
                      'TA0' : 1.561342e-03,
                      'TA1' : 2.561486e-04,
                      'TA2' : 1.896537e-07,
                      'TA3' : 1.301189e-07,
                      'TOFFSET' : 0.000000e+00
                  },

                  'cond_calibration_coeffs':  {
                      'G' : -9.896568e-01,
                      'H' : 1.316599e-01,
                      'I' : -2.213854e-04,
                      'J' : 3.292199e-05,
                      'CPCOR' : -9.570000e-08,
                      'CTCOR' : 3.250000e-06,
                      'CSLOPE' : 1.000000e+00
                  },

                  'pres_calibration_coeffs' : {
                      'PA0' : 4.960417e-02,
                      'PA1' : 4.883682e-04,
                      'PA2' : -5.687309e-12,
                      'PTCA0' : 5.249802e+05,
                      'PTCA1' : 7.595719e+00,
                      'PTCA2' : -1.322776e-01,
                      'PTCB0' : 2.503125e+01,
                      'PTCB1' : 5.000000e-05,
                      'PTCB2' : 0.000000e+00,
                      'PTEMPA0' : -6.431504e+01,
                      'PTEMPA1' : 5.168177e+01,
                      'PTEMPA2' : -2.847757e-01,
                      'POFFSET' : 0.000000e+00
                  }

              }


        return config
コード例 #26
0
class DatasetManagementIntTest(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.resource_registry  = ResourceRegistryServiceClient()
        self.dataset_management = DatasetManagementServiceClient()

    def test_dataset_crud(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        tdom, sdom = time_series_domain()
        dataset_id = self.dataset_management.create_dataset(name='ctd_dataset', parameter_dictionary_id=pdict_id, spatial_domain=sdom.dump(), temporal_domain=tdom.dump())

        ds_obj = self.dataset_management.read_dataset(dataset_id)
        self.assertEquals(ds_obj.name, 'ctd_dataset')
        
        ds_obj.name = 'something different'
        self.dataset_management.update_dataset(ds_obj)
        self.dataset_management.register_dataset(dataset_id)
        ds_obj2 = self.dataset_management.read_dataset(dataset_id)
        self.assertEquals(ds_obj.name, ds_obj2.name)
        self.assertTrue(ds_obj2.registered)
    
    def test_context_crud(self):
        context_ids = self.create_contexts()
        context_id = context_ids.pop()

        context = DatasetManagementService.get_parameter_context(context_id)
        self.assertIsInstance(context, ParameterContext)
        self.assertEquals(context.identifier, context_id)

        self.dataset_management.delete_parameter_context(context_id)

        with self.assertRaises(NotFound):
            self.dataset_management.read_parameter_context(context_id)



    def test_pfunc_crud(self):
        contexts, funcs = self.create_pfuncs()
        context_ids = [context_id for ctxt,context_id in contexts.itervalues()]

        pdict_id = self.dataset_management.create_parameter_dictionary(name='functional_pdict', parameter_context_ids=context_ids, temporal_context='time')
        self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)

        expr, expr_id = funcs['CONDWAT_L1']
        func_class = DatasetManagementService.get_parameter_function(expr_id)
        self.assertIsInstance(func_class, NumexprFunction)

    def test_pdict_crud(self):
        context_ids = self.create_contexts()
        pdict_res_id = self.dataset_management.create_parameter_dictionary(name='pdict1', parameter_context_ids=context_ids, temporal_context='time')

        pdict_contexts = self.dataset_management.read_parameter_contexts(parameter_dictionary_id=pdict_res_id, id_only=True)

        pdict = DatasetManagementService.get_parameter_dictionary(pdict_res_id)
        self.assertIsInstance(pdict, ParameterDictionary)
        self.assertTrue('time_test' in pdict)
        self.assertEquals(pdict.identifier, pdict_res_id)

        self.assertEquals(set(pdict_contexts), set(context_ids))

        self.dataset_management.delete_parameter_dictionary(parameter_dictionary_id=pdict_res_id)
        with self.assertRaises(NotFound):
            self.dataset_management.read_parameter_dictionary(parameter_dictionary_id=pdict_res_id)

    def create_contexts(self):
        context_ids = []
        cond_ctxt = ParameterContext('conductivity_test', param_type=QuantityType(value_encoding=np.float32))
        cond_ctxt.uom = 'unknown'
        cond_ctxt.fill_value = 0e0
        context_ids.append(self.dataset_management.create_parameter_context(name='conductivity_test', parameter_context=cond_ctxt.dump()))

        pres_ctxt = ParameterContext('pressure_test', param_type=QuantityType(value_encoding=np.float32))
        pres_ctxt.uom = 'Pascal'
        pres_ctxt.fill_value = 0x0
        context_ids.append(self.dataset_management.create_parameter_context(name='pressure_test', parameter_context=pres_ctxt.dump()))

        sal_ctxt = ParameterContext('salinity_test', param_type=QuantityType(value_encoding=np.float32))
        sal_ctxt.uom = 'PSU'
        sal_ctxt.fill_value = 0x0
        context_ids.append(self.dataset_management.create_parameter_context(name='salinity_test', parameter_context=sal_ctxt.dump()))

        temp_ctxt = ParameterContext('temp_test', param_type=QuantityType(value_encoding=np.float32))
        temp_ctxt.uom = 'degree_Celsius'
        temp_ctxt.fill_value = 0e0
        context_ids.append(self.dataset_management.create_parameter_context(name='temp_test', parameter_context=temp_ctxt.dump()))

        t_ctxt = ParameterContext('time_test', param_type=QuantityType(value_encoding=np.int64))
        t_ctxt.uom = 'seconds since 1970-01-01'
        t_ctxt.fill_value = 0x0
        context_ids.append(self.dataset_management.create_parameter_context(name='time_test', parameter_context=t_ctxt.dump()))

        return context_ids

    def create_pfuncs(self):
        contexts = {}
        funcs = {}

        t_ctxt = ParameterContext('TIME', param_type=QuantityType(value_encoding=np.dtype('int64')))
        t_ctxt.uom = 'seconds since 01-01-1900'
        t_ctxt_id = self.dataset_management.create_parameter_context(name='test_TIME', parameter_context=t_ctxt.dump())
        contexts['TIME'] = (t_ctxt, t_ctxt_id)

        lat_ctxt = ParameterContext('LAT', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999)
        lat_ctxt.axis = AxisTypeEnum.LAT
        lat_ctxt.uom = 'degree_north'
        lat_ctxt_id = self.dataset_management.create_parameter_context(name='test_LAT', parameter_context=lat_ctxt.dump())
        contexts['LAT'] = lat_ctxt, lat_ctxt_id

        lon_ctxt = ParameterContext('LON', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999)
        lon_ctxt.axis = AxisTypeEnum.LON
        lon_ctxt.uom = 'degree_east'
        lon_ctxt_id = self.dataset_management.create_parameter_context(name='test_LON', parameter_context=lon_ctxt.dump())
        contexts['LON'] = lon_ctxt, lon_ctxt_id

        # Independent Parameters

        # Temperature - values expected to be the decimal results of conversion from hex
        temp_ctxt = ParameterContext('TEMPWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999)
        temp_ctxt.uom = 'deg_C'
        temp_ctxt_id = self.dataset_management.create_parameter_context(name='test_TEMPWAT_L0', parameter_context=temp_ctxt.dump())
        contexts['TEMPWAT_L0'] = temp_ctxt, temp_ctxt_id

        # Conductivity - values expected to be the decimal results of conversion from hex
        cond_ctxt = ParameterContext('CONDWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999)
        cond_ctxt.uom = 'S m-1'
        cond_ctxt_id = self.dataset_management.create_parameter_context(name='test_CONDWAT_L0', parameter_context=cond_ctxt.dump())
        contexts['CONDWAT_L0'] = cond_ctxt, cond_ctxt_id

        # Pressure - values expected to be the decimal results of conversion from hex
        press_ctxt = ParameterContext('PRESWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999)
        press_ctxt.uom = 'dbar'
        press_ctxt_id = self.dataset_management.create_parameter_context(name='test_PRESWAT_L0', parameter_context=press_ctxt.dump())
        contexts['PRESWAT_L0'] = press_ctxt, press_ctxt_id


        # Dependent Parameters

        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        tl1_func = '(T / 10000) - 10'
        expr = NumexprFunction('TEMPWAT_L1', tl1_func, ['T'])
        expr_id = self.dataset_management.create_parameter_function(name='test_TEMPWAT_L1', parameter_function=expr.dump())
        funcs['TEMPWAT_L1'] = expr, expr_id

        tl1_pmap = {'T': 'TEMPWAT_L0'}
        expr.param_map = tl1_pmap
        tempL1_ctxt = ParameterContext('TEMPWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL)
        tempL1_ctxt.uom = 'deg_C'
        tempL1_ctxt_id = self.dataset_management.create_parameter_context(name='test_TEMPWAT_L1', parameter_context=tempL1_ctxt.dump(), parameter_function_id=expr_id)
        contexts['TEMPWAT_L1'] = tempL1_ctxt, tempL1_ctxt_id

        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        cl1_func = '(C / 100000) - 0.5'
        expr = NumexprFunction('CONDWAT_L1', cl1_func, ['C'])
        expr_id = self.dataset_management.create_parameter_function(name='test_CONDWAT_L1', parameter_function=expr.dump())
        funcs['CONDWAT_L1'] = expr, expr_id

        cl1_pmap = {'C': 'CONDWAT_L0'}
        expr.param_map = cl1_pmap
        condL1_ctxt = ParameterContext('CONDWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL)
        condL1_ctxt.uom = 'S m-1'
        condL1_ctxt_id = self.dataset_management.create_parameter_context(name='test_CONDWAT_L1', parameter_context=condL1_ctxt.dump(), parameter_function_id=expr_id)
        contexts['CONDWAT_L1'] = condL1_ctxt, condL1_ctxt_id

        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)'
        expr = NumexprFunction('PRESWAT_L1', pl1_func, ['P', 'p_range'])
        expr_id = self.dataset_management.create_parameter_function(name='test_PRESWAT_L1', parameter_function=expr.dump())
        funcs['PRESWAT_L1'] = expr, expr_id
        
        pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721}
        expr.param_map = pl1_pmap
        presL1_ctxt = ParameterContext('PRESWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL)
        presL1_ctxt.uom = 'S m-1'
        presL1_ctxt_id = self.dataset_management.create_parameter_context(name='test_CONDWAT_L1', parameter_context=presL1_ctxt.dump(), parameter_function_id=expr_id)
        contexts['PRESWAT_L1'] = presL1_ctxt, presL1_ctxt_id

        # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project:
        #       https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1

        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        owner = 'gsw'
        sal_func = 'SP_from_C'
        sal_arglist = ['C', 't', 'p']
        expr = PythonFunction('PRACSAL', owner, sal_func, sal_arglist)
        expr_id = self.dataset_management.create_parameter_function(name='test_PRACSAL', parameter_function=expr.dump())
        funcs['PRACSAL'] = expr, expr_id
        
        # A magic function that may or may not exist actually forms the line below at runtime.
        sal_pmap = {'C': NumexprFunction('CONDWAT_L1*10', 'C*10', ['C'], param_map={'C': 'CONDWAT_L1'}), 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'}
        expr.param_map = sal_pmap
        sal_ctxt = ParameterContext('PRACSAL', param_type=ParameterFunctionType(expr), variability=VariabilityEnum.TEMPORAL)
        sal_ctxt.uom = 'g kg-1'
        sal_ctxt_id = self.dataset_management.create_parameter_context(name='test_PRACSAL', parameter_context=sal_ctxt.dump(), parameter_function_id=expr_id)
        contexts['PRACSAL'] = sal_ctxt, sal_ctxt_id

        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        owner = 'gsw'
        abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON','LAT'])
        cons_temp_expr = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1'])
        dens_expr = PythonFunction('DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1'])
        dens_ctxt = ParameterContext('DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL)
        dens_ctxt.uom = 'kg m-3'
        dens_ctxt_id = self.dataset_management.create_parameter_context(name='test_DENSITY', parameter_context=dens_ctxt.dump())
        contexts['DENSITY'] = dens_ctxt, dens_ctxt_id
        return contexts, funcs

    def test_verify_contexts(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True)
        pcontexts = self.dataset_management.read_parameter_contexts(parameter_dictionary_id=pdict_id)
        for pcontext in pcontexts:
            self.assertTrue('fill_value' in pcontext)
            self.assertTrue('reference_urls' in pcontext)
            self.assertTrue('internal_name' in pcontext)
            self.assertTrue('display_name' in pcontext)
            self.assertTrue('standard_name' in pcontext)
            self.assertTrue('ooi_short_name' in pcontext)
            self.assertTrue('description' in pcontext)
            self.assertTrue('precision' in pcontext)
コード例 #27
0
class TestDMEnd2End(IonIntegrationTestCase):
    def setUp(self): # Love the non pep-8 convention
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.process_dispatcher   = ProcessDispatcherServiceClient()
        self.pubsub_management    = PubsubManagementServiceClient()
        self.resource_registry    = ResourceRegistryServiceClient()
        self.dataset_management   = DatasetManagementServiceClient()
        self.ingestion_management = IngestionManagementServiceClient()
        self.data_retriever       = DataRetrieverServiceClient()
        self.event                = Event()
        self.exchange_space_name  = 'test_granules'
        self.exchange_point_name  = 'science_data'       
        self.i                    = 0
        self.cci                  = 0

    #--------------------------------------------------------------------------------
    # Helper/Utility methods
    #--------------------------------------------------------------------------------
        
    def create_dataset(self, parameter_dict_id=''):
        '''
        Creates a time-series dataset
        '''
        if not parameter_dict_id:
            parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)

        dataset = Dataset('test_dataset_%i'%self.i)
        dataset_id = self.dataset_management.create_dataset(dataset, parameter_dictionary_id=parameter_dict_id)
        self.addCleanup(self.dataset_management.delete_dataset, dataset_id)
        return dataset_id
    
    def get_datastore(self, dataset_id):
        '''
        Gets an instance of the datastore
            This method is primarily used to defeat a bug where integration tests in multiple containers may sometimes 
            delete a CouchDB datastore and the other containers are unaware of the new state of the datastore.
        '''
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore
    
    def get_ingestion_config(self):
        '''
        Grab the ingestion configuration from the resource registry
        '''
        # The ingestion configuration should have been created by the bootstrap service 
        # which is configured through r2deploy.yml

        ingest_configs, _  = self.resource_registry.find_resources(restype=RT.IngestionConfiguration,id_only=True)
        return ingest_configs[0]

    def launch_producer(self, stream_id=''):
        '''
        Launch the producer
        '''
        pid = self.container.spawn_process('better_data_producer', 'ion.processes.data.example_data_producer', 'BetterDataProducer', {'process':{'stream_id':stream_id}})
        self.addCleanup(self.container.terminate_process, pid)

    def make_simple_dataset(self):
        '''
        Makes a stream, a stream definition and a dataset, the essentials for most of these tests
        '''
        pdict_id             = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id        = self.pubsub_management.create_stream_definition('ctd data %i' % self.i, parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        stream_id, route     = self.pubsub_management.create_stream('ctd stream %i' % self.i, 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        dataset_id = self.create_dataset(pdict_id)

        # self.get_datastore(dataset_id)
        self.i += 1
        return stream_id, route, stream_def_id, dataset_id

    def publish_hifi(self,stream_id,stream_route,offset=0):
        '''
        Publish deterministic data
        '''

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10) + (offset * 10)
        rdt['temp'] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())

    def publish_fake_data(self,stream_id, route):
        '''
        Make four granules
        '''
        for i in xrange(4):
            self.publish_hifi(stream_id,route,i)

    def start_ingestion(self, stream_id, dataset_id):
        '''
        Starts ingestion/persistence for a given dataset
        '''
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)
    
    def stop_ingestion(self, stream_id):
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id)

    def validate_granule_subscription(self, msg, route, stream_id):
        '''
        Validation for granule format
        '''
        if msg == {}:
            return
        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.info('%s', rdt.pretty_print())
        self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg))
        self.event.set()

    def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40):
        '''
        Loops until there is a sufficient amount of data in the dataset
        '''
        done = False
        with gevent.Timeout(40):
            while not done:
                extents = self.dataset_management.dataset_extents(dataset_id, 'time')
                granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1)
                rdt     = RecordDictionaryTool.load_from_granule(granule)
                if rdt['time'] and rdt['time'][0] != rdt._pdict.get_context('time').fill_value and extents >= data_size:
                    done = True
                else:
                    gevent.sleep(0.2)


    #--------------------------------------------------------------------------------
    # Test Methods
    #--------------------------------------------------------------------------------

    def test_dm_end_2_end(self):
        #--------------------------------------------------------------------------------
        # Set up a stream and have a mock instrument (producer) send data
        #--------------------------------------------------------------------------------
        self.event.clear()

        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        
        stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)


        stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)

        #--------------------------------------------------------------------------------
        # Start persisting the data on the stream 
        # - Get the ingestion configuration from the resource registry
        # - Create the dataset
        # - call persist_data_stream to setup the subscription for the ingestion workers
        #   on the stream that you specify which causes the data to be persisted
        #--------------------------------------------------------------------------------

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        #--------------------------------------------------------------------------------
        # Now the granules are ingesting and persisted
        #--------------------------------------------------------------------------------

        self.launch_producer(stream_id)
        self.wait_until_we_have_enough_granules(dataset_id,40)
        
        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------
        
        replay_data = self.data_retriever.retrieve(dataset_id)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:])
        self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all())

        
        #--------------------------------------------------------------------------------
        # Now to try the streamed approach
        #--------------------------------------------------------------------------------
        replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)
        self.replay_id, process_id =  self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id)
        log.info('Process ID: %s', process_id)

        replay_client = ReplayClient(process_id)

    
        #--------------------------------------------------------------------------------
        # Create the listening endpoint for the the retriever to talk to 
        #--------------------------------------------------------------------------------
        sub_id = self.pubsub_management.create_subscription(self.exchange_space_name,stream_ids=[replay_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        self.data_retriever.start_replay_agent(self.replay_id)

        self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched')
        replay_client.start_replay()
        
        self.assertTrue(self.event.wait(10))

        self.data_retriever.cancel_replay_agent(self.replay_id)


        #--------------------------------------------------------------------------------
        # Test the slicing capabilities
        #--------------------------------------------------------------------------------

        granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)})
        rdt = RecordDictionaryTool.load_from_granule(granule)
        b = rdt['time'] == np.arange(5)
        self.assertTrue(b.all() if not isinstance(b,bool) else b)


    def test_coverage_transform(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_parsed()
        stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)

        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)
        publisher = StandaloneStreamPublisher(stream_id, route)
        
        rdt = ph.get_rdt(stream_def_id)
        ph.fill_parsed_rdt(rdt)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.wait())

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time'])
        np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp'])

        np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914]))
        np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.]))
        np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068]))
        np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32'))
        np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32'))


    def test_ingestion_pause(self):
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        ingestion_config_id = self.get_ingestion_config()
        self.start_ingestion(ctd_stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, ctd_stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)
        publisher.publish(rdt.to_granule())
        self.assertTrue(monitor.wait())
        granule = self.data_retriever.retrieve(dataset_id)


        self.ingestion_management.pause_data_stream(ctd_stream_id, ingestion_config_id)

        monitor.event.clear()
        rdt['time'] = np.arange(10,20)
        publisher.publish(rdt.to_granule())
        self.assertFalse(monitor.event.wait(1))

        self.ingestion_management.resume_data_stream(ctd_stream_id, ingestion_config_id)

        self.assertTrue(monitor.wait())

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt2['time'], np.arange(20))

    def test_last_granule(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        self.publish_hifi(stream_id,route, 0)
        self.publish_hifi(stream_id,route, 1)
        

        self.wait_until_we_have_enough_granules(dataset_id,20) # I just need two


        success = False
        def verifier():
                replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id, 10)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(10) + 10
                if not isinstance(comp,bool):
                    return comp.all()
                return False
        success = poll(verifier)

        self.assertTrue(success)

        success = False
        def verify_points():
                replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id,5)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(15,20)
                if not isinstance(comp,bool):
                    return comp.all()
                return False
        success = poll(verify_points)

        self.assertTrue(success)

    def test_replay_with_parameters(self):
        #--------------------------------------------------------------------------------
        # Create the configurations and the dataset
        #--------------------------------------------------------------------------------
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        

        stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id)
        
        stream_id, route  = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        config_id  = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        self.publish_fake_data(stream_id, route)

        self.assertTrue(dataset_monitor.wait())

        query = {
            'start_time': 0 - 2208988800,
            'end_time':   19 - 2208988800,
            'stride_time' : 2,
            'parameters': ['time','temp']
        }
        retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query)

        rdt = RecordDictionaryTool.load_from_granule(retrieved_data)
        np.testing.assert_array_equal(rdt['time'], np.arange(0,20,2))
        self.assertEquals(set(rdt.iterkeys()), set(['time','temp']))

        extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp'])
        self.assertTrue(extents['time']>=20)
        self.assertTrue(extents['temp']>=20)

    def test_repersist_data(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.publish_hifi(stream_id,route,0)
        self.publish_hifi(stream_id,route,1)
        self.wait_until_we_have_enough_granules(dataset_id,20)
        config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)
        self.publish_hifi(stream_id,route,2)
        self.publish_hifi(stream_id,route,3)
        self.wait_until_we_have_enough_granules(dataset_id,40)
        success = False
        with gevent.timeout.Timeout(5):
            while not success:

                replay_granule = self.data_retriever.retrieve(dataset_id)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(0,40)
                if not isinstance(comp,bool):
                    success = comp.all()
                gevent.sleep(1)

        self.assertTrue(success)


    @unittest.skip('deprecated')
    def test_correct_time(self):

        # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e. 
        #  the conversion factor between unix and NTP time
        unix_now = np.floor(time.time())
        ntp_now  = unix_now + 2208988800 

        unix_ago = unix_now - 20
        ntp_ago  = unix_ago + 2208988800

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        coverage = DatasetManagementService._get_simplex_coverage(dataset_id, mode='a')
        coverage.insert_timesteps(20)
        coverage.set_parameter_values('time', np.arange(ntp_ago,ntp_now))
        
        temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id)

        self.assertTrue( np.abs(temporal_bounds[0] - unix_ago) < 2)
        self.assertTrue( np.abs(temporal_bounds[1] - unix_now) < 2)


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_out_of_band_retrieve(self):
        # Setup the environemnt
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        
        # Fill the dataset
        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id,40)

        # Retrieve the data
        granule = DataRetrieverService.retrieve_oob(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assertTrue((rdt['time'] == np.arange(40)).all())

    def publish_and_wait(self, dataset_id, granule):
        stream_ids, _ = self.resource_registry.find_objects(dataset_id, PRED.hasStream,id_only=True)
        stream_id=stream_ids[0]
        route = self.pubsub_management.read_stream_route(stream_id)
        publisher = StandaloneStreamPublisher(stream_id,route)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.wait())


    def test_sparse_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_sparse()
        stream_def_id = self.pubsub_management.create_stream_definition('sparse', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)
        dataset_id = self.create_dataset(pdict_id)
        self.start_ingestion(stream_id,dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        # Publish initial granule
        # the first one has the sparse value set inside it, sets lat to 45 and lon to -71
        ntp_now = time.time() + 2208988800
        rdt = ph.get_rdt(stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['internal_timestamp'] = [ntp_now]
        rdt['temp'] = [300000]
        rdt['preferred_timestamp'] = ['driver_timestamp']
        rdt['port_timestamp'] = [ntp_now]
        rdt['quality_flag'] = ['']
        rdt['lat'] = [45]
        rdt['conductivity'] = [4341400]
        rdt['driver_timestamp'] = [ntp_now]
        rdt['lon'] = [-71]
        rdt['pressure'] = [256.8]

        publisher = StandaloneStreamPublisher(stream_id, route)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.wait())
        dataset_monitor.reset()

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        # Check the values and make sure they're correct
        np.testing.assert_allclose(rdt_out['time'], rdt['time'])
        np.testing.assert_allclose(rdt_out['temp'], rdt['temp'])
        np.testing.assert_allclose(rdt_out['lat'], np.array([45]))
        np.testing.assert_allclose(rdt_out['lon'], np.array([-71]))

        np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914]))
        np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.]))
        np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068]))
        np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32'))
        np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32'))


        # We're going to change the lat/lon
        rdt = ph.get_rdt(stream_def_id)
        rdt['time'] = time.time() + 2208988800
        rdt['lat'] = [46]
        rdt['lon'] = [-73]
        
        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.wait())
        dataset_monitor.reset()


        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_allclose(rdt_out['time'], rdt['time'])
        
        for i in xrange(9):
            ntp_now = time.time() + 2208988800
            rdt['time'] = [ntp_now]
            rdt['internal_timestamp'] = [ntp_now]
            rdt['temp'] = [300000]
            rdt['preferred_timestamp'] = ['driver_timestamp']
            rdt['port_timestamp'] = [ntp_now]
            rdt['quality_flag'] = [None]
            rdt['conductivity'] = [4341400]
            rdt['driver_timestamp'] = [ntp_now]
            rdt['pressure'] = [256.8]

            publisher.publish(rdt.to_granule())
            self.assertTrue(dataset_monitor.wait())
            dataset_monitor.reset()

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_allclose(rdt_out['pressure'], np.array([256.8] * 10))
        np.testing.assert_allclose(rdt_out['lat'], np.array([45] + [46] * 9))
        np.testing.assert_allclose(rdt_out['lon'], np.array([-71] + [-73] * 9))
コード例 #28
0
class CtdTransformsIntTest(IonIntegrationTestCase):
    def setUp(self):
        super(CtdTransformsIntTest, self).setUp()

        self._start_container()
        self.container.start_rel_from_url("res/deploy/r2deploy.yml")

        self.pubsub = PubsubManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.dataproduct_management = DataProductManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()

        # This is for the time values inside the packets going into the transform
        self.i = 0

        # Cleanup of queue created by the subscriber
        self.queue_cleanup = []
        self.data_process_cleanup = []

    def _create_input_param_dict_for_test(self, parameter_dict_name=""):

        pdict = ParameterDictionary()

        t_ctxt = ParameterContext("time", param_type=QuantityType(value_encoding=numpy.dtype("float64")))
        t_ctxt.axis = AxisTypeEnum.TIME
        t_ctxt.uom = "seconds since 01-01-1900"
        pdict.add_context(t_ctxt)

        cond_ctxt = ParameterContext("conductivity", param_type=QuantityType(value_encoding=numpy.dtype("float32")))
        cond_ctxt.uom = ""
        pdict.add_context(cond_ctxt)

        pres_ctxt = ParameterContext("pressure", param_type=QuantityType(value_encoding=numpy.dtype("float32")))
        pres_ctxt.uom = ""
        pdict.add_context(pres_ctxt)

        if parameter_dict_name == "input_param_dict":
            temp_ctxt = ParameterContext("temperature", param_type=QuantityType(value_encoding=numpy.dtype("float32")))
        else:
            temp_ctxt = ParameterContext("temp", param_type=QuantityType(value_encoding=numpy.dtype("float32")))

        temp_ctxt.uom = ""
        pdict.add_context(temp_ctxt)

        dens_ctxt = ParameterContext("density", param_type=QuantityType(value_encoding=numpy.dtype("float32")))
        dens_ctxt.uom = ""
        pdict.add_context(dens_ctxt)

        sal_ctxt = ParameterContext("salinity", param_type=QuantityType(value_encoding=numpy.dtype("float32")))
        sal_ctxt.uom = ""
        pdict.add_context(sal_ctxt)

        # create temp streamdef so the data product can create the stream
        pc_list = []
        for pc_k, pc in pdict.iteritems():
            ctxt_id = self.dataset_management.create_parameter_context(pc_k, pc[1].dump())
            pc_list.append(ctxt_id)
            if parameter_dict_name == "input_param_dict":
                self.addCleanup(self.dataset_management.delete_parameter_context, ctxt_id)
            elif parameter_dict_name == "output_param_dict" and pc[1].name == "temp":
                self.addCleanup(self.dataset_management.delete_parameter_context, ctxt_id)

        pdict_id = self.dataset_management.create_parameter_dictionary(parameter_dict_name, pc_list)
        self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)

        return pdict_id

    def _get_new_ctd_L0_packet(self, stream_definition_id, length):

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)
        rdt["time"] = numpy.arange(self.i, self.i + length)

        for field in rdt:
            if isinstance(rdt._pdict.get_context(field).param_type, QuantityType):
                rdt[field] = numpy.array([random.uniform(0.0, 75.0) for i in xrange(length)])

        g = rdt.to_granule()
        self.i += length

        return g

    def _create_calibration_coefficients_dict(self):
        config = DotDict()
        config.process.calibration_coeffs = {
            "temp_calibration_coeffs": {
                "TA0": 1.561342e-03,
                "TA1": 2.561486e-04,
                "TA2": 1.896537e-07,
                "TA3": 1.301189e-07,
                "TOFFSET": 0.000000e00,
            },
            "cond_calibration_coeffs": {
                "G": -9.896568e-01,
                "H": 1.316599e-01,
                "I": -2.213854e-04,
                "J": 3.292199e-05,
                "CPCOR": -9.570000e-08,
                "CTCOR": 3.250000e-06,
                "CSLOPE": 1.000000e00,
            },
            "pres_calibration_coeffs": {
                "PA0": 4.960417e-02,
                "PA1": 4.883682e-04,
                "PA2": -5.687309e-12,
                "PTCA0": 5.249802e05,
                "PTCA1": 7.595719e00,
                "PTCA2": -1.322776e-01,
                "PTCB0": 2.503125e01,
                "PTCB1": 5.000000e-05,
                "PTCB2": 0.000000e00,
                "PTEMPA0": -6.431504e01,
                "PTEMPA1": 5.168177e01,
                "PTEMPA2": -2.847757e-01,
                "POFFSET": 0.000000e00,
            },
        }

        return config

    def clean_queues(self):
        for queue in self.queue_cleanup:
            xn = self.container.ex_manager.create_xn_queue(queue)
            xn.delete()

    def cleaning_operations(self):
        for dproc_id in self.data_process_cleanup:
            self.data_process_management.delete_data_process(dproc_id)

    def test_ctd_L1_all(self):
        """
        Test that packets are processed by the ctd_L1_all transform
        """

        # ----------- Data Process Definition --------------------------------

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name="CTDBP_L1_Transform",
            description="Take granules on the L0 stream which have the C, T and P data and separately apply algorithms and output on the L1 stream.",
            module="ion.processes.data.transforms.ctdbp.ctdbp_L1",
            class_name="CTDBP_L1_Transform",
        )

        dprocdef_id = self.data_process_management.create_data_process_definition(dpd_obj)
        self.addCleanup(self.data_process_management.delete_data_process_definition, dprocdef_id)

        log.debug("created data process definition: id = %s", dprocdef_id)

        # ----------- Data Products --------------------------------

        # Construct temporal and spatial Coordinate Reference System objects
        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()

        # Get the stream definition for the stream using the parameter dictionary
        L0_pdict_id = self._create_input_param_dict_for_test(parameter_dict_name="input_param_dict")

        L0_stream_def_id = self.pubsub.create_stream_definition(name="parsed", parameter_dictionary_id=L0_pdict_id)
        self.addCleanup(self.pubsub.delete_stream_definition, L0_stream_def_id)

        L1_pdict_id = self._create_input_param_dict_for_test(parameter_dict_name="output_param_dict")
        L1_stream_def_id = self.pubsub.create_stream_definition(name="L1_out", parameter_dictionary_id=L1_pdict_id)
        self.addCleanup(self.pubsub.delete_stream_definition, L1_stream_def_id)

        log.debug("Got the parsed parameter dictionary: id: %s", L0_pdict_id)
        log.debug("Got the stream def for parsed input: %s", L0_stream_def_id)

        log.debug("got the stream def for the output: %s", L1_stream_def_id)

        # Input data product
        L0_stream_dp_obj = IonObject(
            RT.DataProduct,
            name="L0_stream",
            description="L0 stream input to CTBP L1 transform",
            temporal_domain=tdom,
            spatial_domain=sdom,
        )

        input_dp_id = self.dataproduct_management.create_data_product(
            data_product=L0_stream_dp_obj, stream_definition_id=L0_stream_def_id
        )
        self.addCleanup(self.dataproduct_management.delete_data_product, input_dp_id)

        # output data product
        L1_stream_dp_obj = IonObject(
            RT.DataProduct,
            name="L1_stream",
            description="L1_stream output of CTBP L1 transform",
            temporal_domain=tdom,
            spatial_domain=sdom,
        )

        L1_stream_dp_id = self.dataproduct_management.create_data_product(
            data_product=L1_stream_dp_obj, stream_definition_id=L1_stream_def_id
        )
        self.addCleanup(self.dataproduct_management.delete_data_product, L1_stream_dp_id)

        # We need the key name here to be "L1_stream", since when the data process is launched, this name goes into
        # the config as in config.process.publish_streams.L1_stream when the config is used to launch the data process
        out_stream_ids, _ = self.resource_registry.find_objects(L1_stream_dp_id, PRED.hasStream, RT.Stream, True)
        self.assertTrue(len(out_stream_ids))
        output_stream_id = out_stream_ids[0]

        config = self._create_calibration_coefficients_dict()
        dproc_id = self.data_process_management.create_data_process(
            data_process_definition_id=dprocdef_id,
            in_data_product_ids=[input_dp_id],
            out_data_product_ids=[L1_stream_dp_id],
            configuration=config,
        )

        self.addCleanup(self.data_process_management.delete_data_process, dproc_id)
        log.debug("Created a data process for ctdbp_L1. id: %s", dproc_id)

        # Activate the data process
        self.data_process_management.activate_data_process(dproc_id)
        self.addCleanup(self.data_process_management.deactivate_data_process, dproc_id)

        # ----------- Find the stream that is associated with the input data product when it was created by create_data_product() --------------------------------

        stream_ids, _ = self.resource_registry.find_objects(input_dp_id, PRED.hasStream, RT.Stream, True)

        input_stream_id = stream_ids[0]
        input_stream = self.resource_registry.read(input_stream_id)
        stream_route = input_stream.stream_route

        log.debug("The input stream for the L1 transform: %s", input_stream_id)

        # ----------- Create a subscriber that will listen to the transform's output --------------------------------

        ar = gevent.event.AsyncResult()

        def subscriber(m, r, s):
            ar.set(m)

        sub = StandaloneStreamSubscriber(exchange_name="sub", callback=subscriber)

        sub_id = self.pubsub.create_subscription(
            "subscriber_to_transform", stream_ids=[output_stream_id], exchange_name="sub"
        )
        self.addCleanup(self.pubsub.delete_subscription, sub_id)

        self.pubsub.activate_subscription(sub_id)
        self.addCleanup(self.pubsub.deactivate_subscription, sub_id)

        sub.start()
        self.addCleanup(sub.stop)

        # ----------- Publish on that stream so that the transform can receive it --------------------------------

        pub = StandaloneStreamPublisher(input_stream_id, stream_route)
        publish_granule = self._get_new_ctd_L0_packet(stream_definition_id=L0_stream_def_id, length=5)

        pub.publish(publish_granule)

        log.debug("Published the following granule: %s", publish_granule)

        granule_from_transform = ar.get(timeout=20)

        log.debug("Got the following granule from the transform: %s", granule_from_transform)

        # Check that the granule published by the L1 transform has the right properties
        self._check_granule_from_transform(granule_from_transform)

    def _check_granule_from_transform(self, granule):
        """
        An internal method to check if a granule has the right properties
        """

        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assertIn("pressure", rdt)
        self.assertIn("temp", rdt)
        self.assertIn("conductivity", rdt)
        self.assertIn("time", rdt)