def get_last_values(cls, dataset_id): coverage = DatasetManagementService._get_coverage(dataset_id) black_box = CoverageCraft(coverage) black_box.sync_rdt_with_coverage(tdoa=slice(-1,None)) granule = black_box.to_granule() return granule
def get_last_granule(cls, container, dataset_id): dsm_cli = DatasetManagementServiceClient() dataset = dsm_cli.read_dataset(dataset_id) cc = container datastore_name = dataset.datastore_name view_name = dataset.view_name datastore = cc.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA) opts = dict( start_key = [dataset_id, {}], end_key = [dataset_id, 0], descending = True, limit = 1, include_docs = True ) results = datastore.query_view(view_name,opts=opts) if not results: raise NotFound('A granule could not be located.') if results[0] is None: raise NotFound('A granule could not be located.') doc = results[0].get('doc') if doc is None: return None ts = float(doc.get('ts_create',0)) coverage = DatasetManagementService._get_coverage(dataset_id) black_box = CoverageCraft(coverage) black_box.sync_rdt_with_coverage(start_time=ts,end_time=None) granule = black_box.to_granule() return granule
def publish_hifi(self, stream_id, offset=0): pub = SimpleStreamPublisher.new_publisher(self.container, self.exchange_point_name, stream_id) black_box = CoverageCraft() black_box.rdt["time"] = np.arange(10) + (offset * 10) black_box.rdt["temp"] = (np.arange(10) + (offset * 10)) * 2 granule = black_box.to_granule() pub.publish(granule)
def test_to_coverage(self): granule = self.sample_granule() crafter = CoverageCraft() crafter.sync_with_granule(granule) coverage = crafter.coverage time_vals = coverage.get_time_values() comp = time_vals == np.arange(20) self.assertTrue(comp.all())
def execute_retrieve(self): ''' execute_retrieve Executes a retrieval and returns the result as a value in lieu of publishing it on a stream ''' coverage = DatasetManagementService._get_coverage(self.dataset_id) crafter = CoverageCraft(coverage) #@todo: add bounds checking to ensure the dataset being retrieved is not too large crafter.sync_rdt_with_coverage(start_time=self.start_time,end_time=self.end_time,parameters=self.parameters) granule = crafter.to_granule() return granule
def sample_granule(self): pdict = CoverageCraft.create_parameters() rdt = RecordDictionaryTool(param_dictionary=pdict) rdt['time'] = np.arange(20) rdt['temp'] = np.array([5] * 20) rdt['conductivity'] = np.array([10] * 20) rdt['lat'] = np.array([0] * 20) rdt['lon'] = np.array([0] * 20) rdt['depth'] = np.array([0] * 20) rdt['data'] = np.array([0x01] * 20) return build_granule('sample', param_dictionary=pdict, record_dictionary=rdt)
def add_granule(self,stream_id, granule): ''' Appends the granule's data to the coverage and persists it. ''' #-------------------------------------------------------------------------------- # Coverage determiniation and appending #-------------------------------------------------------------------------------- dataset_id = self.get_dataset(stream_id) if not dataset_id: log.error('No dataset could be determined on this stream: %s', stream_id) return coverage = self.get_coverage(stream_id) if not coverage: log.error('Could not persist coverage from granule, coverage is None') return #-------------------------------------------------------------------------------- # Actual persistence #-------------------------------------------------------------------------------- covcraft = CoverageCraft(coverage) covcraft.sync_with_granule(granule) DatasetManagementService._persist_coverage(dataset_id,coverage)
def test_replay_pause(self): # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id) replay_stream, replay_route = self.pubsub_management.create_stream('replay', 'xp1', stream_definition_id=stream_def_id) dataset_id = self.create_dataset(pdict_id) scov = DatasetManagementService._get_simplex_coverage(dataset_id) bb = CoverageCraft(scov) bb.rdt['time'] = np.arange(100) bb.rdt['temp'] = np.random.random(100) + 30 bb.sync_with_granule() DatasetManagementService._persist_coverage(dataset_id, bb.coverage) # This invalidates it for multi-host configurations # Set up the subscriber to verify the data subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription) xp = self.container.ex_manager.create_xp('xp1') self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) # Set up the replay agent and the client wrapper # 1) Define the Replay (dataset and stream to publish on) self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream) # 2) Make a client to the interact with the process (optionall provide it a process to bind with) replay_client = ReplayClient(process_id) # 3) Start the agent (launch the process) self.data_retriever.start_replay_agent(self.replay_id) # 4) Start replaying... replay_client.start_replay() # Wait till we get some granules self.assertTrue(self.event.wait(5)) # We got granules, pause the replay, clear the queue and allow the process to finish consuming replay_client.pause_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure there's no remaining messages being consumed self.assertFalse(self.event.wait(1)) # Resume the replay and wait until we start getting granules again replay_client.resume_replay() self.assertTrue(self.event.wait(5)) # Stop the replay, clear the queues replay_client.stop_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure that it did indeed stop self.assertFalse(self.event.wait(1)) subscriber.stop()
def _trigger_func(self, stream_id): t_i = 0 while not self.finished.is_set(): length = 10 black_box = CoverageCraft() black_box.rdt['time'] = numpy.arange(10) + t_i*10 black_box.rdt['temp'] = numpy.random.random(10) * 10 black_box.rdt['lat'] = numpy.array([0] * 10) black_box.rdt['lon'] = numpy.array([0] * 10) black_box.rdt['depth'] = numpy.array([0] * 10) black_box.rdt['conductivity'] = numpy.random.random(10) * 10 black_box.rdt['data'] = numpy.random.randint(0,255,10) # Simulates random bytes black_box.sync_with_granule() granule = black_box.to_granule() self.publish(granule) gevent.sleep(self.interval) t_i += 1
def test_replay_pause(self): # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) context_ids = self.dataset_management.read_parameter_contexts( pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append( self.dataset_management.create_parameter_context( 'binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append( self.dataset_management.create_parameter_context( 'records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary( 'replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition( 'replay_stream', parameter_dictionary_id=pdict_id) replay_stream, replay_route = self.pubsub_management.create_stream( 'replay', 'xp1', stream_definition_id=stream_def_id) dataset_id = self.create_dataset(pdict_id) scov = DatasetManagementService._get_coverage(dataset_id) bb = CoverageCraft(scov) bb.rdt['time'] = np.arange(100) bb.rdt['temp'] = np.random.random(100) + 30 bb.sync_with_granule() DatasetManagementService._persist_coverage( dataset_id, bb.coverage) # This invalidates it for multi-host configurations # Set up the subscriber to verify the data subscriber = StandaloneStreamSubscriber( self.exchange_space_name, self.validate_granule_subscription) xp = self.container.ex_manager.create_xp('xp1') self.queue_buffer.append(self.exchange_space_name) subscriber.start() subscriber.xn.bind(replay_route.routing_key, xp) # Set up the replay agent and the client wrapper # 1) Define the Replay (dataset and stream to publish on) self.replay_id, process_id = self.data_retriever.define_replay( dataset_id=dataset_id, stream_id=replay_stream) # 2) Make a client to the interact with the process (optionall provide it a process to bind with) replay_client = ReplayClient(process_id) # 3) Start the agent (launch the process) self.data_retriever.start_replay_agent(self.replay_id) # 4) Start replaying... replay_client.start_replay() # Wait till we get some granules self.assertTrue(self.event.wait(5)) # We got granules, pause the replay, clear the queue and allow the process to finish consuming replay_client.pause_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure there's no remaining messages being consumed self.assertFalse(self.event.wait(1)) # Resume the replay and wait until we start getting granules again replay_client.resume_replay() self.assertTrue(self.event.wait(5)) # Stop the replay, clear the queues replay_client.stop_replay() gevent.sleep(1) subscriber.xn.purge() self.event.clear() # Make sure that it did indeed stop self.assertFalse(self.event.wait(1)) subscriber.stop()
def test_coverage_ingest(self): stream_id = self.pubsub_management.create_stream() dataset_id = self.create_dataset() # I freaking hate this bug self.get_datastore(dataset_id) ingestion_config_id = self.get_ingestion_config() self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id ) black_box = CoverageCraft() black_box.rdt["time"] = np.arange(20) black_box.rdt["temp"] = np.random.random(20) * 10 black_box.sync_with_granule() granule = black_box.to_granule() publisher = SimpleStreamPublisher.new_publisher(self.container, self.exchange_point_name, stream_id) publisher.publish(granule) self.wait_until_we_have_enough_granules(dataset_id, 1) coverage = DatasetManagementService._get_coverage(dataset_id) black_box = CoverageCraft(coverage) black_box.sync_rdt_with_coverage() comp = black_box.rdt["time"] == np.arange(20) self.assertTrue(comp.all()) black_box = CoverageCraft() black_box.rdt["time"] = np.arange(20) + 20 black_box.rdt["temp"] = np.random.random(20) * 10 black_box.sync_with_granule() granule = black_box.to_granule() publisher.publish(granule) self.wait_until_we_have_enough_granules(dataset_id, 2) coverage = DatasetManagementService._get_coverage(dataset_id) black_box = CoverageCraft(coverage) black_box.sync_rdt_with_coverage() comp = black_box.rdt["time"] == np.arange(40) self.assertTrue(comp.all()) granule = self.data_retriever.retrieve(dataset_id) black_box = CoverageCraft() black_box.sync_rdt_with_granule(granule) comp = black_box.rdt["time"] == np.arange(40) self.assertTrue(comp.all())