def test_ingestion_pause(self): ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() ingestion_config_id = self.get_ingestion_config() self.start_ingestion(ctd_stream_id, dataset_id) self.addCleanup(self.stop_ingestion, ctd_stream_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) publisher = StandaloneStreamPublisher(ctd_stream_id, route) monitor = DatasetMonitor(dataset_id) self.addCleanup(monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(monitor.wait()) granule = self.data_retriever.retrieve(dataset_id) self.ingestion_management.pause_data_stream(ctd_stream_id, ingestion_config_id) monitor.event.clear() rdt['time'] = np.arange(10,20) publisher.publish(rdt.to_granule()) self.assertFalse(monitor.event.wait(1)) self.ingestion_management.resume_data_stream(ctd_stream_id, ingestion_config_id) self.assertTrue(monitor.wait()) granule = self.data_retriever.retrieve(dataset_id) rdt2 = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_almost_equal(rdt2['time'], np.arange(20))
def loclrng_checks(self): ref = 'GP03FLMA-RI001-06-CTDMOG999' self.stream_def_id = self.create_stream_definition( 'local range check', parameter_dictionary_id=self.pdict_id, stream_configuration={'reference_designator': ref}) self.rdt = RecordDictionaryTool( stream_definition_id=self.stream_def_id) self.rdt['time'] = np.arange(10) lon, lat, pressure = [-124.832179, 46.436926, 37.5] self.rdt['lon'] = lon self.rdt['lat'] = lat self.rdt['pressure'] = [37.5] * 10 self.rdt['temp'] = [30] * 10 self.rdt.fetch_lookup_values() doc = self.svm.read_value('lrt_%s_TEMPWAT' % ref) self.assertEquals(doc['dims'], ['lon', 'lat', 'pressure']) doc = self.svm.read_value('lrt_%s_PRESWAT' % ref) self.assertEquals(doc['dims'], ['temp']) np.testing.assert_array_equal(self.rdt['tempwat_loclrng_qc'], [1] * 10) self.rdt['pressure'] = [37.5] * 9 + [75.] self.rdt['temp'] = [30] * 9 + [18] np.testing.assert_array_equal(self.rdt['tempwat_loclrng_qc'], [1] * 9 + [0]) self.rdt['temp'] = [15] * 5 + [35] * 5 self.rdt['pressure'] = [10] * 10 np.testing.assert_array_equal(self.rdt['preswat_loclrng_qc'], [1] * 5 + [0] * 5)
def new_rdt(self, ref='QCTEST'): self.stream_def_id = self.create_stream_definition( uuid4().hex, parameter_dictionary_id=self.pdict_id, stream_configuration={'reference_designator': 'QCTEST'}) self.rdt = RecordDictionaryTool( stream_definition_id=self.stream_def_id)
def test_qc_events(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_qc_pdict() stream_def_id = self.pubsub_management.create_stream_definition('qc stream def', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('qc stream', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) ingestion_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) config = DotDict() self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id, config=config) self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id) publisher = StandaloneStreamPublisher(stream_id, route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.arange(10) * 3 verified = Event() def verification(event, *args, **kwargs): self.assertEquals(event.qc_parameter, 'temp_qc') self.assertEquals(event.temporal_value, 7) verified.set() es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=dataset_id, callback=verification, auto_delete=True) es.start() self.addCleanup(es.stop) publisher.publish(rdt.to_granule()) self.assertTrue(verified.wait(10))
def create_test_granules(self, buffer_data=False): """ Generate test granules from particles. If buffer data is set to true then try to buffer data into a granule. If the particle has the new sequence flag set then a new granule will be generated. This method emulates the agent_stream_publisher module. :return: list of granules generated. """ base_timestamp = 3583861263.0 connection_index = 0 particles = [] particles.append( self.get_particle(base_timestamp, 10.5914, 161.06, 4.1870, 2693.0)) particles.append( self.get_particle(base_timestamp + 1, 10.5915, 161.07, 4.1871, 2693.1)) particles.append( self.get_particle(base_timestamp + 2, 10.5916, 161.08, 4.1872, 2693.2)) particles.append( self.get_particle(base_timestamp + 3, 10.5917, 161.09, 4.1873, 2693.3, True)) particles.append( self.get_particle(base_timestamp + 4, 10.5918, 161.10, 4.1874, 2693.4)) data_groups = [] result_granules = [] data_groups_index = 0 for particle in particles: # If we need a new connection then start a new group, but only if we have found # something in the current group if (particle.get('new_sequence', False) or buffer_data == False) and \ (len(data_groups) > 0 and len(data_groups[data_groups_index]) > 0): data_groups_index += 1 if len(data_groups) <= data_groups_index: data_groups.append([]) data_groups[data_groups_index].append(particle) log.debug("Granules to create: %s", len(data_groups)) for data in data_groups: connection_id = uuid.uuid4() connection_index += 1 rdt = RecordDictionaryTool(param_dictionary=self.get_param_dict()) rdt = populate_rdt(rdt, data) g = rdt.to_granule(data_producer_id='agent_res_id', connection_id=connection_id.hex, connection_index=str(connection_index)) result_granules.append(g) return result_granules
def create_granule(self, stream_name, param_dict_name, particle_list): pd_id = self.dataset_management.read_parameter_dictionary_by_name(param_dict_name, id_only=True) stream_def_id = self.pubsub_client.create_stream_definition(name=stream_name, parameter_dictionary_id=pd_id) stream_def = self.pubsub_client.read_stream_definition(stream_def_id) rdt = RecordDictionaryTool(stream_definition=stream_def) rdt = populate_rdt(rdt, particle_list) log.trace("RDT: %s", str(rdt)) g = rdt.to_granule(data_producer_id='fake_agent_id') return g
def publish_hifi(self,stream_id,stream_route,offset=0): pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) + (offset * 10) rdt['temp'] = np.arange(10) + (offset * 10) pub.publish(rdt.to_granule())
def glblrng_checks(self): ref = 'CE01ISSM-MF005-01-CTDBPC999' self.stream_def_id = self.create_stream_definition('global range check', parameter_dictionary_id=self.pdict_id, stream_configuration={'reference_designator':ref}) self.rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) self.rdt['time'] = np.arange(10) self.rdt['temp'] = [-10] * 5 + [4] * 5 self.rdt.fetch_lookup_values() np.testing.assert_array_equal(self.rdt['tempwat_glblrng_qc'], [0]*5 + [1]*5)
def publish_loop(self): t_i = 0 while not self.finished.is_set(): rdt = RecordDictionaryTool(stream_definition_id=self.stream_def._id) rdt['time'] = numpy.arange(10) + t_i*10 rdt['temp'] = numpy.random.random_sample(10)*(30-0)+0 self.publish(rdt.to_granule()) gevent.sleep(self.interval) t_i += 1
def spketst_checks(self): ref = 'GP02HYPM-SP001-04-CTDPF0999' self.stream_def_id = self.create_stream_definition('spike test check', parameter_dictionary_id=self.pdict_id, stream_configuration={'reference_designator':ref}) self.rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) self.rdt['time'] = np.arange(20) self.rdt['temp'] = [13] * 9 + [100] + [13] * 10 self.rdt.fetch_lookup_values() np.testing.assert_array_equal(self.rdt['tempwat_spketst_qc'], [1]*9 + [0] + [1]*10)
def stuckvl_checks(self): ref = 'GP02HYPM-SP001-04-CTDPF0999' self.stream_def_id = self.create_stream_definition('stuck value checks', parameter_dictionary_id=self.pdict_id, stream_configuration={'reference_designator':ref}) self.rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) self.rdt['time'] = np.arange(50) self.rdt['temp'] = [20] * 30 + range(20) self.rdt.fetch_lookup_values() np.testing.assert_array_equal(self.rdt['tempwat_stuckvl_qc'], [0]*30 + [1]*20)
def publish_loop(self): #@todo - add lots of comments in here while not self.finished.is_set(): length = 10 #Explicitly make these numpy arrays... c = numpy.array( [random.uniform(0.0, 75.0) for i in xrange(length)]) t = numpy.array( [random.uniform(-1.7, 21.0) for i in xrange(length)]) p = numpy.array( [random.lognormvariate(1, 2) for i in xrange(length)]) lat = numpy.array( [random.uniform(-90.0, 90.0) for i in xrange(length)]) lon = numpy.array( [random.uniform(0.0, 360.0) for i in xrange(length)]) tvar = numpy.array( [self.last_time + i for i in xrange(1, length + 1)]) self.last_time = max(tvar) parameter_dictionary = self._create_parameter() rdt = RecordDictionaryTool(param_dictionary=parameter_dictionary) # This is an example of using groups it is not a normative statement about how to use groups rdt['temp'] = t rdt['conductivity'] = c rdt['pressure'] = p #add a value sequence of raw bytes - not sure the type below is correct? with open('/dev/urandom', 'r') as rand: rdt['raw_fixed'] = numpy.array( [rand.read(32) for i in xrange(length)], dtype='a32') #add a value sequence of raw bytes - not sure the type below is correct? with open('/dev/urandom', 'r') as rand: rdt['raw_blob'] = numpy.array( [rand.read(random.randint(1, 40)) for i in xrange(length)], dtype=object) rdt['time'] = tvar rdt['lat'] = lat rdt['lon'] = lon g = rdt.to_granule() log.info('Sending %d values!' % length) if isinstance(g, Granule): self.publish(g) gevent.sleep(self.interval)
def _publish_stream_buffer(self, stream_name): """ ['quality_flag', 'preferred_timestamp', 'port_timestamp', 'lon', 'raw', 'internal_timestamp', 'time', 'lat', 'driver_timestamp'] ['quality_flag', 'preferred_timestamp', 'temp', 'density', 'port_timestamp', 'lon', 'salinity', 'pressure', 'internal_timestamp', 'time', 'lat', 'driver_timestamp', 'conductivit {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "raw", "values": [{"binary": true, "value": "MzIuMzkxOSw5MS4wOTUxMiwgNzg0Ljg1MywgICA2LjE5OTQsIDE1MDUuMTc5LCAxOSBEZWMgMjAxMiwgMDA6NTI6Mjc=", "value_id": "raw"}]}', 'time': 1355878347.744123} {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed", "values": [{"value": 32.3919, "value_id": "temp"}, {"value": 91.09512, "value_id": "conductivity"}, {"value": 784.853, "value_id": "pressure"}]}', 'time': 1355878347.744127} {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'port_timestamp': [None], 'lon': [None], 'raw': ['-4.9733,16.02390, 539.527, 34.2719, 1506.862, 19 Dec 2012, 01:03:07'], 'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117]} {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'temp': [-4.9733], 'density': [None], 'port_timestamp': [None], 'lon': [None], 'salinity': [None], 'pressure': [539.527], 'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117], 'conductivity': [16.0239]} """ try: buf_len = len(self._stream_buffers[stream_name]) if buf_len == 0: return stream_def = self._stream_defs[stream_name] if isinstance(stream_def, str): rdt = RecordDictionaryTool(stream_definition_id=stream_def) else: rdt = RecordDictionaryTool(stream_definition=stream_def) publisher = self._publishers[stream_name] vals = [] for x in xrange(buf_len): vals.append(self._stream_buffers[stream_name].pop()) rdt = populate_rdt(rdt, vals) #log.info('Outgoing granule: %s', #['%s: %s'%(k,v) for k,v in rdt.iteritems()]) #log.info('Outgoing granule preferred timestamp: %s' % rdt['preferred_timestamp'][0]) #log.info('Outgoing granule destined for stream: %s', stream_name) g = rdt.to_granule(data_producer_id=self._agent.resource_id, connection_id=self._connection_ID.hex, connection_index=str(self._connection_index[stream_name])) publisher.publish(g) #log.info('Instrument agent %s published data granule on stream %s.', #self._agent._proc_name, stream_name) #log.info('Connection id: %s, connection index: %i.', #self._connection_ID.hex, self._connection_index[stream_name]) self._connection_index[stream_name] += 1 except: log.exception('Instrument agent %s could not publish data on stream %s.', self._agent._proc_name, stream_name)
def publish_hifi(self,stream_id,stream_route,offset=0): ''' Publish deterministic data ''' pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) + (offset * 10) rdt['temp'] = np.arange(10) + (offset * 10) pub.publish(rdt.to_granule())
def test_ingestion_gap_analysis(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) self.addCleanup(self.stop_ingestion, stream_id) connection1 = uuid4().hex connection2 = uuid4().hex rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [0] rdt['temp'] = [0] self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index='0')) rdt['time'] = [1] rdt['temp'] = [1] self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index=1)) rdt['time'] = [2] rdt['temp'] = [2] self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index='3')) # Gap, missed message rdt['time'] = [3] rdt['temp'] = [3] self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index='3')) # Gap, new connection rdt['time'] = [4] rdt['temp'] = [4] self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index='4')) rdt['time'] = [5] rdt['temp'] = [5] self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index=5)) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(6)) np.testing.assert_array_equal(rdt['temp'], np.arange(6)) return dataset_id
def publish_loop(self): t_i = 0 while not self.finished.is_set(): rdt = RecordDictionaryTool(stream_definition_id=self.stream_def._id) rdt['time'] = numpy.arange(10) + t_i*10 rdt['temp'] = numpy.random.random(10) * 10 rdt['lat'] = numpy.array([0] * 10) rdt['lon'] = numpy.array([0] * 10) rdt['conductivity'] = numpy.random.random(10) * 10 rdt['binary'] = numpy.array(['hi'] * 10, dtype='object') self.publish(rdt.to_granule()) gevent.sleep(self.interval) t_i += 1
def publish_loop(self): #@todo - add lots of comments in here while not self.finished.is_set(): length = 10 #Explicitly make these numpy arrays... c = numpy.array([random.uniform(0.0,75.0) for i in xrange(length)]) t = numpy.array([random.uniform(-1.7, 21.0) for i in xrange(length)]) p = numpy.array([random.lognormvariate(1,2) for i in xrange(length)]) lat = numpy.array([random.uniform(-90.0, 90.0) for i in xrange(length)]) lon = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)]) tvar = numpy.array([self.last_time + i for i in xrange(1,length+1)]) self.last_time = max(tvar) parameter_dictionary = self._create_parameter() rdt = RecordDictionaryTool(param_dictionary=parameter_dictionary) # This is an example of using groups it is not a normative statement about how to use groups rdt['temp'] = t rdt['conductivity'] = c rdt['pressure'] = p #add a value sequence of raw bytes - not sure the type below is correct? with open('/dev/urandom','r') as rand: rdt['raw_fixed'] = numpy.array([rand.read(32) for i in xrange(length)], dtype='a32') #add a value sequence of raw bytes - not sure the type below is correct? with open('/dev/urandom','r') as rand: rdt['raw_blob'] = numpy.array([rand.read(random.randint(1,40)) for i in xrange(length)], dtype=object) rdt['time'] = tvar rdt['lat'] = lat rdt['lon'] = lon g = rdt.to_granule() log.info('Sending %d values!' % length) if isinstance(g,Granule): self.publish(g) gevent.sleep(self.interval)
def create_test_granules(self, buffer_data=False): """ Generate test granules from particles. If buffer data is set to true then try to buffer data into a granule. If the particle has the new sequence flag set then a new granule will be generated. This method emulates the agent_stream_publisher module. :return: list of granules generated. """ base_timestamp = 3583861263.0 connection_index = 0 particles = [] particles.append(self.get_particle(base_timestamp, 10.5914, 161.06, 4.1870, 2693.0)) particles.append(self.get_particle(base_timestamp+1, 10.5915, 161.07, 4.1871, 2693.1)) particles.append(self.get_particle(base_timestamp+2, 10.5916, 161.08, 4.1872, 2693.2)) particles.append(self.get_particle(base_timestamp+3, 10.5917, 161.09, 4.1873, 2693.3, True)) particles.append(self.get_particle(base_timestamp+4, 10.5918, 161.10, 4.1874, 2693.4)) data_groups = [] result_granules = [] data_groups_index = 0 for particle in particles: # If we need a new connection then start a new group, but only if we have found # something in the current group if (particle.get('new_sequence', False) or buffer_data == False) and \ (len(data_groups) > 0 and len(data_groups[data_groups_index]) > 0): data_groups_index += 1 if len(data_groups) <= data_groups_index: data_groups.append([]) data_groups[data_groups_index].append(particle) log.debug("Granules to create: %s", len(data_groups)) for data in data_groups: connection_id = uuid.uuid4() connection_index += 1 rdt = RecordDictionaryTool(param_dictionary=self.get_param_dict()) rdt = populate_rdt(rdt, data) g = rdt.to_granule(data_producer_id='agent_res_id', connection_id=connection_id.hex, connection_index=str(connection_index)) result_granules.append(g) return result_granules
def test_replay_with_parameters(self): # -------------------------------------------------------------------------------- # Create the configurations and the dataset # -------------------------------------------------------------------------------- # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext("binary", param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context("binary", bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext("records", param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context("records", rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary( "replay_pdict", parameter_context_ids=context_ids, temporal_context="time" ) stream_def_id = self.pubsub_management.create_stream_definition( "replay_stream", parameter_dictionary_id=pdict_id ) stream_id, route = self.pubsub_management.create_stream( "replay_with_params", exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id ) config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id ) # -------------------------------------------------------------------------------- # Coerce the datastore into existence (beats race condition) # -------------------------------------------------------------------------------- self.get_datastore(dataset_id) self.launch_producer(stream_id) self.wait_until_we_have_enough_granules(dataset_id, 40) query = { "start_time": 0 - 2208988800, "end_time": 20 - 2208988800, "stride_time": 2, "parameters": ["time", "temp"], } retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id, query=query) rdt = RecordDictionaryTool.load_from_granule(retrieved_data) comp = np.arange(0, 20, 2) == rdt["time"] self.assertTrue(comp.all(), "%s" % rdt.pretty_print()) self.assertEquals(set(rdt.iterkeys()), set(["time", "temp"])) extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=["time", "temp"]) self.assertTrue(extents["time"] >= 20) self.assertTrue(extents["temp"] >= 20) self.streams.append(stream_id) self.stop_ingestion(stream_id)
def _extract_granule_data(self, granules): """ Pull all data out of all granules and return a dict of values """ result = [] for granule in granules: group = [] log.debug("Granule: %s", granule) rdt = RecordDictionaryTool.load_from_granule(granule) # Store the data from each record for key, value in rdt.iteritems(): for i in range(0, len(value)): if len(group) <= i: group.append({}) group[i][key] = value[i] # Store the connection information for each record if not 'connection_index' in group[i]: group[i]['connection_index'] = granule.connection_index if not 'connection_id' in group[i]: group[i]['connection_id'] = granule.connection_id result += group log.debug("extracted granules: %s", pprint.pformat(result)) return result
def test_last_granule(self): # -------------------------------------------------------------------------------- # Create the necessary configurations for the test # -------------------------------------------------------------------------------- stream_id = self.pubsub_management.create_stream() config_id = self.get_ingestion_config() dataset_id = self.create_dataset() self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id ) # -------------------------------------------------------------------------------- # Create the datastore first, # -------------------------------------------------------------------------------- self.get_datastore(dataset_id) self.publish_hifi(stream_id, 0) self.publish_hifi(stream_id, 1) self.wait_until_we_have_enough_granules(dataset_id, 2) # I just need two replay_granule = self.data_retriever.retrieve_last_granule(dataset_id) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt["time"] == np.arange(10) + 10 self.assertTrue(comp.all())
def test_stuck_value_test(self): TestQCFunctions.test_stuck_value_test(self) self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt) self.dataset_monitor.event.wait(10) rdt = RecordDictionaryTool.load_from_granule(self.data_retriever.retrieve(self.dataset_id)) np.testing.assert_array_almost_equal(rdt['tempwat_stuckvl_qc'], [1, 1, 0, 0, 0, 0, 1, 1, 1, 1])
def check_localrange(self): log.info('check_localrange') TestQCFunctions.check_localrange(self) self.init_check() flagged = Event() def cb(event, *args, **kwargs): times = event.temporal_values if not event.qc_parameter == 'tempwat_loclrng_qc': return np.testing.assert_array_equal( times, np.array([ 3580144708.7555027, 3580144709.7555027, 3580144710.7555027, 3580144711.7555027, 3580144712.7555027 ])) flagged.set() event_subscriber = EventSubscriber(event_type=OT.ParameterQCEvent, origin=self.dp_id, callback=cb, auto_delete=True) event_subscriber.start() self.addCleanup(event_subscriber.stop) self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt) self.dataset_monitor.event.wait(10) rdt = RecordDictionaryTool.load_from_granule( self.data_retriever.retrieve(self.dataset_id)) np.testing.assert_array_almost_equal(rdt['tempwat_loclrng_qc'], [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) self.assertTrue(flagged.wait(10))
def validate_granule_subscription(self, msg, route, stream_id): if msg == {}: return rdt = RecordDictionaryTool.load_from_granule(msg) log.info('%s', rdt.pretty_print()) self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg)) self.event.set()
def check_global_range(self): TestQCFunctions.check_global_range(self) self.init_check() flagged = Event() def cb(event, *args, **kwargs): times = event.temporal_values self.assertEquals(times, [0.0, 7.0]) flagged.set() event_subscriber = EventSubscriber(event_type=OT.ParameterQCEvent, origin=self.dp_id, callback=cb, auto_delete=True) event_subscriber.start() self.addCleanup(event_subscriber.stop) self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt) self.dataset_monitor.event.wait(10) rdt = RecordDictionaryTool.load_from_granule( self.data_retriever.retrieve(self.dataset_id)) np.testing.assert_array_almost_equal(rdt['tempwat_glblrng_qc'], [0, 1, 1, 1, 1, 1, 1, 0]) self.assertTrue(flagged.wait(10))
def test_repersist_data(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) self.publish_hifi(stream_id,route,0) self.publish_hifi(stream_id,route,1) self.wait_until_we_have_enough_granules(dataset_id,20) config_id = self.get_ingestion_config() self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id) self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id) self.addCleanup(self.stop_ingestion, stream_id) self.publish_hifi(stream_id,route,2) self.publish_hifi(stream_id,route,3) self.wait_until_we_have_enough_granules(dataset_id,40) success = False with gevent.timeout.Timeout(5): while not success: replay_granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(0,40) if not isinstance(comp,bool): success = comp.all() gevent.sleep(1) self.assertTrue(success)
def test_replay_with_parameters(self): # -------------------------------------------------------------------------------- # Create the configurations and the dataset # -------------------------------------------------------------------------------- stream_id = self.pubsub_management.create_stream() config_id = self.get_ingestion_config() dataset_id = self.create_dataset() self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id ) # -------------------------------------------------------------------------------- # Coerce the datastore into existence (beats race condition) # -------------------------------------------------------------------------------- self.get_datastore(dataset_id) self.launch_producer(stream_id) self.wait_until_we_have_enough_granules(dataset_id, 4) query = {"start_time": 0, "end_time": 20, "parameters": ["time", "temp"]} retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id, query=query) rdt = RecordDictionaryTool.load_from_granule(retrieved_data) comp = np.arange(20) == rdt["time"] self.assertTrue(comp.all(), "%s" % rdt.pretty_print()) self.assertEquals(set(rdt.iterkeys()), set(["time", "temp"]))
def test_repersist_data(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) stream_def_id = self.pubsub_management.create_stream_definition(name='ctd', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream(name='repersist', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id) self.get_datastore(dataset_id) self.publish_hifi(stream_id,route,0) self.publish_hifi(stream_id,route,1) self.wait_until_we_have_enough_granules(dataset_id,2) self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id) self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id) self.publish_hifi(stream_id,route,2) self.publish_hifi(stream_id,route,3) self.wait_until_we_have_enough_granules(dataset_id,4) success = False with gevent.timeout.Timeout(5): while not success: replay_granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(0,40) if not isinstance(comp,bool): success = comp.all() gevent.sleep(1) self.assertTrue(success)
def check_fill_values(self): log.info('check_fill_values') self.new_rdt() self.init_check() self.rdt['time'] = np.arange(5) self.rdt['temp'] = [12] * 5 self.rdt.fetch_lookup_values() np.testing.assert_array_equal(self.rdt['tempwat_glblrng_qc'], [-99] * 5) np.testing.assert_array_equal(self.rdt['tempwat_spketst_qc'], [-99] * 5) np.testing.assert_array_equal(self.rdt['tempwat_stuckvl_qc'], [-99] * 5) np.testing.assert_array_equal(self.rdt['tempwat_trndtst_qc'], [-99] * 5) np.testing.assert_array_equal(self.rdt['tempwat_gradtst_qc'], [-99] * 5) self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt) self.dataset_monitor.event.wait(10) rdt = RecordDictionaryTool.load_from_granule( self.data_retriever.retrieve(self.dataset_id)) np.testing.assert_array_equal(rdt['tempwat_glblrng_qc'], [-99] * 5) np.testing.assert_array_equal(rdt['tempwat_spketst_qc'], [-99] * 5) np.testing.assert_array_equal(rdt['tempwat_stuckvl_qc'], [-99] * 5) np.testing.assert_array_equal(rdt['tempwat_trndtst_qc'], [-99] * 5) np.testing.assert_array_equal(rdt['tempwat_gradtst_qc'], [-99] * 5)
def test_replay_by_time(self): log.info("starting test...") # -------------------------------------------------------------------------------- # Create the necessary configurations for the test # -------------------------------------------------------------------------------- stream_id = self.pubsub_management.create_stream() config_id = self.get_ingestion_config() dataset_id = self.create_dataset() self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id ) # -------------------------------------------------------------------------------- # Create the datastore first, # -------------------------------------------------------------------------------- # There is a race condition sometimes between the services and the process for # the creation of the datastore and it's instance, this ensures the datastore # exists before the process is even subscribing to data. self.get_datastore(dataset_id) self.publish_fake_data(stream_id) self.wait_until_we_have_enough_granules(dataset_id, 2) # I just need two replay_granule = self.data_retriever.retrieve(dataset_id, {"start_time": 0, "end_time": 6}) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt["time"] == np.array([0, 1, 2, 3, 4, 5]) try: log.info("Compared granule: %s", replay_granule.__dict__) log.info("Granule tax: %s", replay_granule.taxonomy.__dict__) except: pass self.assertTrue(comp.all())
def check_gradient(self): log.info('check_gradient') TestQCFunctions.check_gradient(self) self.init_check() self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt) self.dataset_monitor.event.wait(10) rdt = RecordDictionaryTool.load_from_granule(self.data_retriever.retrieve(self.dataset_id)) np.testing.assert_array_equal(rdt['tempwat_gradtst_qc'], [1, 1, 0, 0, 1])
def check_trend(self): log.info('check_trend') TestQCFunctions.check_trend(self) self.init_check() self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt) self.assertTrue(self.dataset_monitor.wait()) rdt = RecordDictionaryTool.load_from_granule(self.data_retriever.retrieve(self.dataset_id)) np.testing.assert_array_almost_equal(rdt['tempwat_trndtst_qc'], [1] * 10)
def verifier(): replay_granule = self.data_retriever.retrieve_last_granule(dataset_id) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(10) + 10 if not isinstance(comp,bool): return comp.all() return False
def get_visualization_image(self, data_product_id='', visualization_parameters=None, callback=''): # Error check if not data_product_id: raise BadRequest("The data_product_id parameter is missing") if visualization_parameters == {}: visualization_parameters = None # Extract the retrieval related parameters. Definitely init all parameters first query = None if visualization_parameters : query = {'parameters':[]} # Error check and damage control. Definitely need time if 'parameters' in visualization_parameters: if not 'time' in visualization_parameters['parameters']: visualization_parameters['parameters'].append('time') query['parameters'] = visualization_parameters['parameters'] if 'stride_time' in visualization_parameters: query['stride_time'] = visualization_parameters['stride_time'] if 'start_time' in visualization_parameters: query['start_time'] = visualization_parameters['start_time'] if 'end_time' in visualization_parameters: query['end_time'] = visualization_parameters['end_time'] # get the dataset_id associated with the data_product. Need it to do the data retrieval ds_ids,_ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.DataSet, True) if ds_ids is None or not ds_ids: return None # Ideally just need the latest granule to figure out the list of images #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2}) retrieved_granule = self.clients.data_retriever.retrieve(ds_ids[0], query=query) if retrieved_granule is None: return None # send the granule through the transform to get the matplotlib graphs mpl_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name('graph_image_param_dict',id_only=True) mpl_stream_def = self.clients.pubsub_management.create_stream_definition('mpl', parameter_dictionary_id=mpl_pdict_id) mpl_data_granule = VizTransformMatplotlibGraphsAlgorithm.execute(retrieved_granule, config=visualization_parameters, params=mpl_stream_def) if mpl_data_granule == None: return None mpl_rdt = RecordDictionaryTool.load_from_granule(mpl_data_granule) ret_dict = dict() ret_dict['content_type'] = (get_safe(mpl_rdt, "content_type"))[0] ret_dict['image_name'] = (get_safe(mpl_rdt, "image_name"))[0] # reason for encoding as base64 string is otherwise message pack complains about the bit stream ret_dict['image_obj'] = base64.encodestring((get_safe(mpl_rdt, "image_obj"))[0]) if callback == '': return ret_dict else: return callback + "(" + simplejson.dumps(ret_dict) + ")"
def verify_points(): replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id,5) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(15,20) if not isinstance(comp,bool): return comp.all() return False
def test_retrieve_and_transform(self): # Stream definition for the CTD data pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id) ctd_stream_id, route = self.pubsub_management.create_stream('ctd stream', 'xp1', stream_definition_id=stream_def_id) # Stream definition for the salinity data salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) sal_stream_def_id = self.pubsub_management.create_stream_definition('sal data', parameter_dictionary_id=salinity_pdict_id) ingest_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) #-------------------------------------------------------------------------------- # Again with this ridiculous problem #-------------------------------------------------------------------------------- self.get_datastore(dataset_id) self.ingestion_management.persist_data_stream(stream_id=ctd_stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['conductivity'] = np.random.randn(10) * 2 + 10 publisher = StandaloneStreamPublisher(ctd_stream_id, route) publisher.publish(rdt.to_granule()) rdt['time'] = np.arange(10,20) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id, 2) granule = self.data_retriever.retrieve(dataset_id, None, None, 'ion.processes.data.transforms.ctd.ctd_L2_salinity', 'CTDL2SalinityTransformAlgorithm', kwargs=dict(params=sal_stream_def_id)) rdt = RecordDictionaryTool.load_from_granule(granule) for i in rdt['salinity']: self.assertNotEquals(i,0)
def test_replay_with_parameters(self): #-------------------------------------------------------------------------------- # Create the configurations and the dataset #-------------------------------------------------------------------------------- # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id) dataset_modified = Event() def cb(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id) es.start() self.addCleanup(es.stop) self.publish_fake_data(stream_id, route) self.assertTrue(dataset_modified.wait(30)) query = { 'start_time': 0 - 2208988800, 'end_time': 20 - 2208988800, 'stride_time' : 2, 'parameters': ['time','temp'] } retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query) rdt = RecordDictionaryTool.load_from_granule(retrieved_data) comp = np.arange(0,20,2) == rdt['time'] self.assertTrue(comp.all(),'%s' % rdt.pretty_print()) self.assertEquals(set(rdt.iterkeys()), set(['time','temp'])) extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp']) self.assertTrue(extents['time']>=20) self.assertTrue(extents['temp']>=20) self.streams.append(stream_id) self.stop_ingestion(stream_id)
def _process_visualization_message(self, messages): final_hc_data = [] final_hc_data_no_numpy = [] for message in messages: if message == None: continue message_data = message.body if isinstance(message_data, Granule): rdt = RecordDictionaryTool.load_from_granule(message_data) hc_data_arr = get_safe(rdt, 'hc_data') # IF this granule does not contain google dt, skip if hc_data_arr is None: continue hc_data = hc_data_arr[0] for series in hc_data: if not series.has_key("name"): continue series_name = series["name"] # find index in final data. If it does not exist, create it final_hc_data_idx = -1 idx = 0 for _s in final_hc_data: if _s["name"] == series_name: final_hc_data_idx = idx break idx += 1 # create an entry in the final hc structure if there's none for this series if final_hc_data_idx == -1: final_hc_data.append({}) final_hc_data_idx = len(final_hc_data) - 1 final_hc_data[final_hc_data_idx]["name"] = series_name final_hc_data[final_hc_data_idx]["data"] = [] if series.has_key("visible"): final_hc_data[final_hc_data_idx][ "visible"] = series["visible"] if series.has_key("tooltip"): final_hc_data[final_hc_data_idx][ "tooltip"] = series["tooltip"] # Append the series data to the final hc structure final_hc_data[final_hc_data_idx]["data"] += series[ "data"].tolist() return json.dumps(final_hc_data)
def test_ingestion_gap_analysis(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset( ) self.start_ingestion(stream_id, dataset_id) self.addCleanup(self.stop_ingestion, stream_id) connection1 = uuid4().hex connection2 = uuid4().hex rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [0] rdt['temp'] = [0] self.publish_and_wait( dataset_id, rdt.to_granule(connection_id=connection1, connection_index='0')) rdt['time'] = [1] rdt['temp'] = [1] self.publish_and_wait( dataset_id, rdt.to_granule(connection_id=connection1, connection_index='1')) rdt['time'] = [2] rdt['temp'] = [2] self.publish_and_wait(dataset_id, rdt.to_granule( connection_id=connection1, connection_index='3')) # Gap, missed message rdt['time'] = [3] rdt['temp'] = [3] self.publish_and_wait(dataset_id, rdt.to_granule( connection_id=connection2, connection_index='3')) # Gap, new connection rdt['time'] = [4] rdt['temp'] = [4] self.publish_and_wait( dataset_id, rdt.to_granule(connection_id=connection2, connection_index='4')) rdt['time'] = [5] rdt['temp'] = [5] self.publish_and_wait( dataset_id, rdt.to_granule(connection_id=connection2, connection_index='5')) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(6)) np.testing.assert_array_equal(rdt['temp'], np.arange(6)) return dataset_id
def validate_granule_subscription(self, msg, route, stream_id): ''' Validation for granule format ''' if msg == {}: return rdt = RecordDictionaryTool.load_from_granule(msg) log.info('%s', rdt.pretty_print()) self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg)) self.event.set()
def setUp(self): DMTestCase.setUp(self) self.ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = self.ph.create_simple_qc_pdict() self.stream_def_id = self.pubsub_management.create_stream_definition('global range', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':'QCTEST'}) self.addCleanup(self.pubsub_management.delete_stream_definition, self.stream_def_id) self.rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) self.svm = StoredValueManager(self.container)
def check_gradient(self): log.info('check_gradient') TestQCFunctions.check_gradient(self) self.init_check() self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt) self.dataset_monitor.event.wait(10) rdt = RecordDictionaryTool.load_from_granule( self.data_retriever.retrieve(self.dataset_id)) np.testing.assert_array_equal(rdt['tempwat_gradtst_qc'], [1, 1, 0, 0, 1])
def assert_granule_time(self, granule, target_time): """ Assert that the granule's time matches the target time @param granule The granule to be searched @param target_time The time that should match the granule's overall time """ rdt = RecordDictionaryTool.load_from_granule(granule) rdt_time = rdt['time'][0] log.debug("assert_granule_time granule time: %s", rdt_time) log.debug("assert_granule_time target timestamp: %s", target_time) self.assertEqual(rdt_time, target_time)
def test_coverage_transform(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_parsed() stream_def_id = self.pubsub_management.create_stream_definition( 'ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream( 'example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) ingestion_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id) self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id) publisher = StandaloneStreamPublisher(stream_id, route) rdt = ph.get_rdt(stream_def_id) ph.fill_parsed_rdt(rdt) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.event.wait(30)) replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time']) np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp']) np.testing.assert_array_almost_equal(rdt_out['conductivity_L1'], np.array([42.914])) np.testing.assert_array_almost_equal(rdt_out['temp_L1'], np.array([20.])) np.testing.assert_array_almost_equal(rdt_out['pressure_L1'], np.array([3.068])) np.testing.assert_array_almost_equal( rdt_out['density'], np.array([1021.7144739593881], dtype='float32')) np.testing.assert_array_almost_equal( rdt_out['salinity'], np.array([30.935132729668283], dtype='float32'))
def test_out_of_band_retrieve(self): # Setup the environemnt stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) # Fill the dataset self.publish_fake_data(stream_id, route) self.wait_until_we_have_enough_granules(dataset_id,40) # Retrieve the data granule = DataRetrieverService.retrieve_oob(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) self.assertTrue((rdt['time'] == np.arange(40)).all())
def test_retrieve_and_transform(self): # Make a simple dataset and start ingestion, pretty standard stuff. ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset( ) self.start_ingestion(ctd_stream_id, dataset_id) # Stream definition for the salinity data salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) sal_stream_def_id = self.pubsub_management.create_stream_definition( 'sal data', parameter_dictionary_id=salinity_pdict_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['conductivity'] = np.random.randn(10) * 2 + 10 rdt['pressure'] = np.random.randn(10) * 1 + 12 publisher = StandaloneStreamPublisher(ctd_stream_id, route) publisher.publish(rdt.to_granule()) rdt['time'] = np.arange(10, 20) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id, 20) granule = self.data_retriever.retrieve( dataset_id, None, None, 'ion.processes.data.transforms.ctd.ctd_L2_salinity', 'CTDL2SalinityTransformAlgorithm', kwargs=dict(params=sal_stream_def_id)) rdt = RecordDictionaryTool.load_from_granule(granule) for i in rdt['salinity']: self.assertNotEquals(i, 0) self.streams.append(ctd_stream_id) self.stop_ingestion(ctd_stream_id)
def _construct_streams(self, stream_info): decoder = IonObjectDeserializer(obj_registry=get_obj_registry()) for (stream_name, config) in stream_info.iteritems(): try: if config.has_key('stream_def_dict'): stream_def_dict = config['stream_def_dict'] stream_def_dict['type_'] = 'StreamDefinition' stream_def_obj = decoder.deserialize(stream_def_dict) self._stream_defs[stream_name] = stream_def_obj rdt = RecordDictionaryTool(stream_definition=stream_def_obj) else: stream_def = config['stream_definition_ref'] self._stream_defs[stream_name] = stream_def rdt = RecordDictionaryTool(stream_definition_id=stream_def) self._agent.aparam_streams[stream_name] = rdt.fields self._agent.aparam_pubrate[stream_name] = 0 except Exception as e: errmsg = 'Instrument agent %s' % self._agent._proc_name errmsg += 'error constructing stream %s. ' % stream_name errmsg += str(e) log.error(errmsg) self._agent.aparam_set_pubrate = self.aparam_set_pubrate
def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40): ''' Loops until there is a sufficient amount of data in the dataset ''' done = False with gevent.Timeout(40): while not done: extents = self.dataset_management.dataset_extents(dataset_id, 'time') granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1) rdt = RecordDictionaryTool.load_from_granule(granule) if rdt['time'] and rdt['time'][0] != rdt._pdict.get_context('time').fill_value and extents >= data_size: done = True else: gevent.sleep(0.2)
def assert_raw_granules_ingested(self, count, payload_size): #-------------------------------------------------------------------------------- # Test the slicing capabilities #-------------------------------------------------------------------------------- data_retriever = DataRetrieverServiceClient() for i in range(0, count - 1): granule = data_retriever.retrieve(dataset_id=self._raw_dataset_id, query={'tdoa': slice(i, i + 1)}) rdt = RecordDictionaryTool.load_from_granule(granule) log.info("Granule index: %d, time: %s, size: %s", i, rdt['time'][0], len(rdt['raw'][0])) self.assertEqual(payload_size, len(rdt['raw'][0]))