def build_param_contexts(self): context_ids = [] t_ctxt = ParameterContext( 'time', param_type=QuantityType(value_encoding=np.dtype('int64'))) t_ctxt.uom = 'seconds since 01-01-1970' context_ids.append( self.dataset_management.create_parameter_context( name='time', parameter_context=t_ctxt.dump())) cnd_ctxt = ParameterContext('conductivity', param_type=ArrayType()) cnd_ctxt.uom = 'mmho/cm' context_ids.append( self.dataset_management.create_parameter_context( name='conductivity', parameter_context=cnd_ctxt.dump())) temp_ctxt = ParameterContext('temperature', param_type=ArrayType()) temp_ctxt.uom = 'degC' context_ids.append( self.dataset_management.create_parameter_context( name='temperature', parameter_context=temp_ctxt.dump())) press_ctxt = ParameterContext('pressure', param_type=ArrayType()) press_ctxt.uom = 'decibars' context_ids.append( self.dataset_management.create_parameter_context( name='pressure', parameter_context=press_ctxt.dump())) oxy_ctxt = ParameterContext('oxygen', param_type=ArrayType()) oxy_ctxt.uom = 'Hz' context_ids.append( self.dataset_management.create_parameter_context( name='oxygen', parameter_context=oxy_ctxt.dump())) return context_ids
def param_type(self, s): if s == 'record': return RecordType() elif s == 'array': return ArrayType() else: return QuantityType(value_encoding=np.dtype(s))
def get_array_type(self, parameter_type=None, encoding=None): if encoding in ('string', 'char', 'str', '', 'opaque'): return self.get_string_type() if 'int' in encoding: fill = -9999 elif 'float' in encoding: fill = np.nan else: raise TypeError("Unknown encoding for array types %s" % encoding) return ArrayType(inner_encoding=encoding, inner_fill_value=fill)
def test_replay_with_parameters(self): #-------------------------------------------------------------------------------- # Create the configurations and the dataset #-------------------------------------------------------------------------------- # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id) self.addCleanup(self.stop_ingestion, stream_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) self.publish_fake_data(stream_id, route) self.assertTrue(dataset_monitor.wait()) query = { 'start_time': 0 - 2208988800, 'end_time': 19 - 2208988800, 'stride_time' : 2, 'parameters': ['time','temp'] } retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query) rdt = RecordDictionaryTool.load_from_granule(retrieved_data) np.testing.assert_array_equal(rdt['time'], np.arange(0,20,2)) self.assertEquals(set(rdt.iterkeys()), set(['time','temp'])) extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp']) self.assertTrue(extents['time']>=20) self.assertTrue(extents['temp']>=20)
def _create_param_contexts(self): context_ids = [] t_ctxt = ParameterContext( 'ingestion_timestamp', param_type=QuantityType(value_encoding=numpy.dtype('float64'))) t_ctxt.uom = 'seconds since 1900-01-01' t_ctxt.fill_value = -9999 t_ctxt_id = self.dataset_management_client.create_parameter_context( name='ingestion_timestamp', parameter_context=t_ctxt.dump()) context_ids.append(t_ctxt_id) raw_ctxt = ParameterContext('raw', param_type=ArrayType()) raw_ctxt.uom = '' context_ids.append( self.dataset_management_client.create_parameter_context( name='raw', parameter_context=raw_ctxt.dump())) return context_ids, t_ctxt_id
def test_dm_end_2_end(self): #-------------------------------------------------------------------------------- # Set up a stream and have a mock instrument (producer) send data #-------------------------------------------------------------------------------- self.event.clear() # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) #-------------------------------------------------------------------------------- # Start persisting the data on the stream # - Get the ingestion configuration from the resource registry # - Create the dataset # - call persist_data_stream to setup the subscription for the ingestion workers # on the stream that you specify which causes the data to be persisted #-------------------------------------------------------------------------------- ingest_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) self.addCleanup(self.stop_ingestion, stream_id) #-------------------------------------------------------------------------------- # Now the granules are ingesting and persisted #-------------------------------------------------------------------------------- self.launch_producer(stream_id) self.wait_until_we_have_enough_granules(dataset_id,40) #-------------------------------------------------------------------------------- # Now get the data in one chunk using an RPC Call to start_retreive #-------------------------------------------------------------------------------- replay_data = self.data_retriever.retrieve(dataset_id) self.assertIsInstance(replay_data, Granule) rdt = RecordDictionaryTool.load_from_granule(replay_data) self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:]) self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all()) #-------------------------------------------------------------------------------- # Now to try the streamed approach #-------------------------------------------------------------------------------- replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id) log.info('Process ID: %s', process_id) replay_client = ReplayClient(process_id) #-------------------------------------------------------------------------------- # Create the listening endpoint for the the retriever to talk to #-------------------------------------------------------------------------------- sub_id = self.pubsub_management.create_subscription(self.exchange_space_name,stream_ids=[replay_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription) subscriber.start() self.addCleanup(subscriber.stop) self.data_retriever.start_replay_agent(self.replay_id) self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched') replay_client.start_replay() self.assertTrue(self.event.wait(10)) self.data_retriever.cancel_replay_agent(self.replay_id) #-------------------------------------------------------------------------------- # Test the slicing capabilities #-------------------------------------------------------------------------------- granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)}) rdt = RecordDictionaryTool.load_from_granule(granule) b = rdt['time'] == np.arange(5) self.assertTrue(b.all() if not isinstance(b,bool) else b)
def get_array_type(self, parameter_type=None, encoding=None): if encoding in ('str', '', 'opaque'): encoding = None return ArrayType(inner_encoding=encoding)
def get_string_array_lookup_value(self, value): return self._placeholder( value, lambda placeholder: ParameterContext( name=placeholder, param_type=SparseConstantType(base_type=ArrayType())))
def get_array_lookup_value(self, value): return self._placeholder( value, lambda placeholder: ParameterContext( name=placeholder, param_type=SparseConstantType(base_type=ArrayType( inner_encoding='float64', inner_fill_value=-9999.))))
def build_param_contexts(self): context_ids = [] t_ctxt = ParameterContext( 'time', param_type=QuantityType(value_encoding=np.dtype('int64'))) t_ctxt.uom = 'seconds since 01-01-1970' context_ids.append( self.dataset_management.create_parameter_context( name='time', parameter_context=t_ctxt.dump())) ut_ctxt = ParameterContext( 'upload_time', param_type=QuantityType(value_encoding=np.dtype('int64'))) ut_ctxt.uom = 'seconds since 01-01-1970' context_ids.append( self.dataset_management.create_parameter_context( name='upload_time', parameter_context=ut_ctxt.dump())) vela_ctxt = ParameterContext('VelA', param_type=ArrayType()) vela_ctxt.uom = 'unknown' context_ids.append( self.dataset_management.create_parameter_context( name='VelA', parameter_context=vela_ctxt.dump())) velb_ctxt = ParameterContext('VelB', param_type=ArrayType()) velb_ctxt.uom = 'unknown' context_ids.append( self.dataset_management.create_parameter_context( name='VelB', parameter_context=velb_ctxt.dump())) velc_ctxt = ParameterContext('VelC', param_type=ArrayType()) velc_ctxt.uom = 'unknown' context_ids.append( self.dataset_management.create_parameter_context( name='VelC', parameter_context=velc_ctxt.dump())) veld_ctxt = ParameterContext('VelD', param_type=ArrayType()) veld_ctxt.uom = 'unknown' context_ids.append( self.dataset_management.create_parameter_context( name='VelD', parameter_context=veld_ctxt.dump())) mx_ctxt = ParameterContext('Mx', param_type=ArrayType()) mx_ctxt.uom = 'unknown' context_ids.append( self.dataset_management.create_parameter_context( name='Mx', parameter_context=mx_ctxt.dump())) my_ctxt = ParameterContext('My', param_type=ArrayType()) my_ctxt.uom = 'unknown' context_ids.append( self.dataset_management.create_parameter_context( name='My', parameter_context=my_ctxt.dump())) mz_ctxt = ParameterContext('Mz', param_type=ArrayType()) mz_ctxt.uom = 'unknown' context_ids.append( self.dataset_management.create_parameter_context( name='Mz', parameter_context=mz_ctxt.dump())) pitch_ctxt = ParameterContext('Pitch', param_type=ArrayType()) pitch_ctxt.uom = 'unknown' context_ids.append( self.dataset_management.create_parameter_context( name='Pitch', parameter_context=pitch_ctxt.dump())) roll_ctxt = ParameterContext('Roll', param_type=ArrayType()) roll_ctxt.uom = 'unknown' context_ids.append( self.dataset_management.create_parameter_context( name='Roll', parameter_context=roll_ctxt.dump())) return context_ids
def build_contexts(): ''' Builds the relevant parameter context objects ''' contexts = [] cond_ctxt = ParameterContext( 'conductivity', param_type=QuantityType(value_encoding=np.float32)) cond_ctxt.uom = 'unknown' cond_ctxt.fill_value = 0e0 contexts.append(cond_ctxt) pres_ctxt = ParameterContext( 'pressure', param_type=QuantityType(value_encoding=np.float32)) pres_ctxt.uom = 'Pascal' pres_ctxt.fill_value = 0x0 contexts.append(pres_ctxt) sal_ctxt = ParameterContext( 'salinity', param_type=QuantityType(value_encoding=np.float32)) sal_ctxt.uom = 'PSU' sal_ctxt.fill_value = 0x0 contexts.append(sal_ctxt) den_ctxt = ParameterContext( 'density', param_type=QuantityType(value_encoding=np.float32)) den_ctxt.uom = 'kg/m3' den_ctxt.fill_value = 0x0 contexts.append(den_ctxt) temp_ctxt = ParameterContext( 'temp', param_type=QuantityType(value_encoding=np.float32)) temp_ctxt.uom = 'degree_Celsius' temp_ctxt.fill_value = 0e0 contexts.append(temp_ctxt) t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=np.int64)) t_ctxt.uom = 'seconds since 1970-01-01' t_ctxt.fill_value = 0x0 contexts.append(t_ctxt) lat_ctxt = ParameterContext( 'lat', param_type=QuantityType(value_encoding=np.float32)) lat_ctxt.axis = AxisTypeEnum.LAT lat_ctxt.uom = 'degree_north' lat_ctxt.fill_value = 0e0 contexts.append(lat_ctxt) lon_ctxt = ParameterContext( 'lon', param_type=QuantityType(value_encoding=np.float32)) lon_ctxt.axis = AxisTypeEnum.LON lon_ctxt.uom = 'degree_east' lon_ctxt.fill_value = 0e0 contexts.append(lon_ctxt) raw_ctxt = ParameterContext('raw', param_type=ArrayType()) raw_ctxt.description = 'raw binary string values' raw_ctxt.uom = 'utf-8 byte string' raw_ctxt.fill_value = 0x0 contexts.append(raw_ctxt) port_ts_ctxt = ParameterContext( name='port_timestamp', param_type=QuantityType(value_encoding=np.float64)) port_ts_ctxt._derived_from_name = 'time' port_ts_ctxt.uom = 'seconds' port_ts_ctxt.fill_value = -1 contexts.append(port_ts_ctxt) driver_ts_ctxt = ParameterContext( name='driver_timestamp', param_type=QuantityType(value_encoding=np.float64)) driver_ts_ctxt._derived_from_name = 'time' driver_ts_ctxt.uom = 'seconds' driver_ts_ctxt.fill_value = -1 contexts.append(driver_ts_ctxt) internal_ts_ctxt = ParameterContext( name='internal_timestamp', param_type=QuantityType(value_encoding=np.float64)) internal_ts_ctxt._derived_from_name = 'time' internal_ts_ctxt.uom = 'seconds' internal_ts_ctxt.fill_value = -1 contexts.append(internal_ts_ctxt) timer_num_ctxt = ParameterContext( name='timer', param_type=QuantityType(value_encoding=np.float64)) timer_num_ctxt.fill_value = -1 contexts.append(timer_num_ctxt) serial_num_ctxt = ParameterContext( name='serial_num', param_type=QuantityType(value_encoding=np.int32)) serial_num_ctxt.fill_value = -1 contexts.append(serial_num_ctxt) count_ctxt = ParameterContext( name='counts', param_type=QuantityType(value_encoding=np.uint64)) count_ctxt.fill_value = -1 contexts.append(count_ctxt) checksum_ctxt = ParameterContext( name='checksum', param_type=QuantityType(value_encoding=np.int32)) checksum_ctxt.fill_value = -1 contexts.append(checksum_ctxt) pref_ts_ctxt = ParameterContext( name='preferred_timestamp', param_type=QuantityType(value_encoding=np.uint64)) pref_ts_ctxt.description = 'name of preferred timestamp' pref_ts_ctxt.fill_value = 0x0 contexts.append(pref_ts_ctxt) # TODO: This should probably be of type CategoryType when implemented qual_flag_ctxt = ParameterContext(name='quality_flag', param_type=ArrayType()) qual_flag_ctxt.description = 'flag indicating quality' qual_flag_ctxt.fill_value = None contexts.append(qual_flag_ctxt) viz_ts_ctxt = ParameterContext( name='viz_timestamp', param_type=QuantityType(value_encoding=np.float64)) viz_ts_ctxt._derived_from_name = 'time' viz_ts_ctxt.uom = 'seconds' viz_ts_ctxt.fill_value = -1 contexts.append(viz_ts_ctxt) viz_prod_type_ctxt = ParameterContext(name='viz_product_type', param_type=ArrayType()) viz_prod_type_ctxt.fill_value = None contexts.append(viz_prod_type_ctxt) image_obj_ctxt = ParameterContext(name='image_obj', param_type=ArrayType()) image_obj_ctxt.fill_value = None contexts.append(image_obj_ctxt) image_name_ctxt = ParameterContext(name='image_name', param_type=ArrayType()) contexts.append(image_name_ctxt) content_type_ctxt = ParameterContext(name='content_type', param_type=ArrayType()) contexts.append(content_type_ctxt) gdt_ctxt = ParameterContext(name='google_dt_components', param_type=RecordType()) contexts.append(gdt_ctxt) mpl_ctxt = ParameterContext(name='mpl_graph', param_type=RecordType()) contexts.append(mpl_ctxt) dummy_ctxt = ParameterContext( name='dummy', param_type=QuantityType(value_encoding=np.int64)) contexts.append(dummy_ctxt) return contexts
def test_replay_pause(self): # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) context_ids = self.dataset_management.read_parameter_contexts( pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append( self.dataset_management.create_parameter_context( 'binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append( self.dataset_management.create_parameter_context( 'records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary( 'replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition( 'replay_stream', parameter_dictionary_id=pdict_id) replay_stream, replay_route = self.pubsub_management.create_stream( 'replay', 'xp1', stream_definition_id=stream_def_id) dataset_id = self.create_dataset(pdict_id) scov = DatasetManagementService._get_coverage(dataset_id) bb = CoverageCraft(scov) bb.rdt['time'] = np.arange(100) bb.rdt['temp'] = np.random.random(100) + 30 bb.sync_with_granule() DatasetManagementService._persist_coverage( dataset_id, bb.coverage) # This invalidates it for multi-host configurations # Set up the subscriber to verify the data subscriber = StandaloneStreamSubscriber( self.exchange_space_name, self.validate_granule_subscription) xp = self.container.ex_manager.create_xp('xp1') self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) # Set up the replay agent and the client wrapper # 1) Define the Replay (dataset and stream to publish on) self.replay_id, process_id = self.data_retriever.define_replay( dataset_id=dataset_id, stream_id=replay_stream) # 2) Make a client to the interact with the process (optionall provide it a process to bind with) replay_client = ReplayClient(process_id) # 3) Start the agent (launch the process) self.data_retriever.start_replay_agent(self.replay_id) # 4) Start replaying... replay_client.start_replay() # Wait till we get some granules self.assertTrue(self.event.wait(5)) # We got granules, pause the replay, clear the queue and allow the process to finish consuming replay_client.pause_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure there's no remaining messages being consumed self.assertFalse(self.event.wait(1)) # Resume the replay and wait until we start getting granules again replay_client.resume_replay() self.assertTrue(self.event.wait(5)) # Stop the replay, clear the queues replay_client.stop_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure that it did indeed stop self.assertFalse(self.event.wait(1)) subscriber.stop()
def get_array_type(parameter_type=None): return ArrayType()