def test_granule(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"}) pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict') self.addCleanup(self.pubsub_management.delete_stream_definition,stream_def_id) stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream,stream_id) publisher = StandaloneStreamPublisher(stream_id, route) subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming) subscriber.start() self.addCleanup(subscriber.stop) subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id]) self.pubsub_management.activate_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['pressure'] = [20] * 10 self.assertEquals(set(pdict.keys()), set(rdt.fields)) self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter) self.assertEquals(rdt._stream_config['reference_designator'],"GA03FLMA-RI001-13-CTDMOG999") self.rdt = rdt self.data_producer_id = 'data_producer' self.provider_metadata_update = {1:1} publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1})) self.assertTrue(self.event.wait(10)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.array([None,None,None]) self.assertTrue(rdt['time'] is None) rdt['time'] = np.array([None, 1, 2]) self.assertEquals(rdt['time'][0], rdt.fill_value('time')) stream_def_obj = self.pubsub_management.read_stream_definition(stream_def_id) rdt = RecordDictionaryTool(stream_definition=stream_def_obj) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) granule = rdt.to_granule() rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(20)) np.testing.assert_array_equal(rdt['temp'], np.arange(20))
def test_array_flow_paths(self): data_product_id, stream_def_id = self.make_array_data_product() dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id) dm = DatasetMonitor(dataset_id) self.addCleanup(dm.stop) # I need to make sure that we can fill the RDT with its values # Test for one timestep # Test for multiple timesteps # Publishes # Ingests correctly # Retrieves correctly #-------------------------------------------------------------------------------- # Ensure that the RDT can be filled with ArrayType values #-------------------------------------------------------------------------------- rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [0] rdt['temp_sample'] = [[0,1,2,3,4]] np.testing.assert_array_equal(rdt['temp_sample'], np.array([[0,1,2,3,4]])) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dm.event.wait(10)) dm.event.clear() granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['temp_sample'], np.array([[0,1,2,3,4]])) #-------------------------------------------------------------------------------- # Ensure that it deals with multiple values #-------------------------------------------------------------------------------- rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [1,2,3] rdt['temp_sample'] = [[0,1,2,3,4],[1],[5,5,5,5,5]] m = rdt.fill_value('temp_sample') or np.finfo(np.float32).max np.testing.assert_equal(m,np.finfo(np.float32).max) np.testing.assert_array_equal(rdt['temp_sample'], [[0,1,2,3,4],[1,m,m,m,m],[5,5,5,5,5]]) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dm.event.wait(10)) dm.event.clear() #-------------------------------------------------------------------------------- # Retrieve and Verify #-------------------------------------------------------------------------------- retrieved_granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(retrieved_granule) np.testing.assert_array_equal(rdt['time'], np.array([0,1,2,3])) np.testing.assert_array_equal(rdt['temp_sample'], np.array([[0,1,2,3,4],[0,1,2,3,4],[1,m,m,m,m],[5,5,5,5,5]]))
def __init__(self, coverage=None, granule=None): if coverage is None: self.coverage = self.create_coverage() self.rdt = RecordDictionaryTool( param_dictionary=self.coverage.parameter_dictionary) else: self.coverage = coverage if granule is not None: self.sync_with_granule(granule) else: self.rdt = RecordDictionaryTool( param_dictionary=self.coverage.parameter_dictionary) self.pdict = self.coverage.parameter_dictionary
def test_serialize_compatability(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition('ctd extended', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('ctd1', 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) sub_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) verified = Event() def verifier(msg, route, stream_id): for k,v in msg.record_dictionary.iteritems(): if v is not None: self.assertIsInstance(v, np.ndarray) rdt = RecordDictionaryTool.load_from_granule(msg) for k,v in rdt.iteritems(): self.assertIsInstance(rdt[k], np.ndarray) self.assertIsInstance(v, np.ndarray) verified.set() subscriber = StandaloneStreamSubscriber('sub1', callback=verifier) subscriber.start() self.addCleanup(subscriber.stop) publisher = StandaloneStreamPublisher(stream_id,route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) ph.fill_rdt(rdt,10) publisher.publish(rdt.to_granule()) self.assertTrue(verified.wait(60))
def execute_retrieve(self): ''' execute_retrieve Executes a retrieval and returns the result as a value in lieu of publishing it on a stream ''' try: coverage = DatasetManagementService._get_coverage(self.dataset_id, mode='r') if coverage.num_timesteps == 0: log.info('Reading from an empty coverage') rdt = RecordDictionaryTool( param_dictionary=coverage.parameter_dictionary) else: rdt = self._coverage_to_granule(coverage=coverage, start_time=self.start_time, end_time=self.end_time, stride_time=self.stride_time, parameters=self.parameters, tdoa=self.tdoa) except: log.exception('Problems reading from the coverage') raise BadRequest('Problems reading from the coverage') finally: coverage.close(timeout=5) return rdt.to_granule()
def retrieve_oob(cls, dataset_id='', query=None, delivery_format=''): query = query or {} coverage = None try: coverage = cls._get_coverage(dataset_id) if coverage is None: raise BadRequest('no such coverage') if coverage.num_timesteps == 0: log.info('Reading from an empty coverage') rdt = RecordDictionaryTool( param_dictionary=coverage.parameter_dictionary) else: rdt = ReplayProcess._coverage_to_granule( coverage=coverage, start_time=query.get('start_time', None), end_time=query.get('end_time', None), stride_time=query.get('stride_time', None), parameters=query.get('parameters', None), stream_def_id=delivery_format, tdoa=query.get('tdoa', None)) except: cls._eject_cache(dataset_id) log.exception('Problems reading from the coverage') raise BadRequest('Problems reading from the coverage') return rdt.to_granule()
def test_coefficient_compatibility(self): data_product_id = self.create_data_product( name='Calibration Coefficient Test Data product', stream_def_id=self.stream_def_id) self.data_product_management.activate_data_product_persistence( data_product_id) self.addCleanup( self.data_product_management.suspend_data_product_persistence, data_product_id) rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = [10] * 10 rdt['cc_coefficient'] = [2] * 10 dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset( data_product_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dataset_monitor.wait()) rdt2 = RecordDictionaryTool.load_from_granule( self.data_retriever.retrieve(dataset_id)) np.testing.assert_array_equal(rdt2['offset'], [12] * 10)
def test_execute_advanced_transform(self): # Runs a transform across L0-L2 with stream definitions including available fields streams = self.setup_advanced_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_defs_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['rho'], np.array([1001.0055034])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool( stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2))
def create_lookup_rdt(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_lookups() stream_def_id = self.pubsub_management.create_stream_definition('lookup', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"}) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) return rdt
def create_rdt(self): contexts, pfuncs = self.create_pfuncs() context_ids = list(contexts.itervalues()) pdict_id = self.dataset_management.create_parameter_dictionary(name='functional_pdict', parameter_context_ids=context_ids, temporal_context='test_TIME') self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) stream_def_id = self.pubsub_management.create_stream_definition('functional', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) return rdt
def populate_vectors(self, stream_def_id, hours, temp_vector): now = time.time() ntp_now = now + 2208988800 for i in xrange(hours): rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) st = ntp_now - (3600 * (hours - i)) et = ntp_now - (3600 * (hours - (i + 1))) rdt['time'] = np.arange(st, et) rdt['temp'] = temp_vector(3600) yield rdt
def get_last_values(cls, dataset_id, number_of_points): coverage = DatasetManagementService._get_coverage(dataset_id, mode='r') if coverage.num_timesteps < number_of_points: if coverage.num_timesteps == 0: rdt = RecordDictionaryTool( param_dictionary=coverage.parameter_dictionary) return rdt.to_granule() number_of_points = coverage.num_timesteps rdt = cls._coverage_to_granule(coverage, tdoa=slice(-number_of_points, None)) coverage.close(timeout=5) return rdt.to_granule()
def write_to_data_product(self, data_product_id): dataset_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasDataset', id_only=True) dataset_id = dataset_ids.pop() stream_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasStream', id_only=True) stream_id = stream_ids.pop() stream_def_ids, _ = self.resource_registry.find_objects( stream_id, 'hasStreamDefinition', id_only=True) stream_def_id = stream_def_ids.pop() route = self.pubsub_management.read_stream_route(stream_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) time_param = rdt._pdict.temporal_parameter_name if time_param is None: print '%s has no temporal parameter' % self.resource_registry.read( data_product_id).name return rdt[time_param] = np.arange(40) for field in rdt.fields: if field == rdt._pdict.temporal_parameter_name: continue rdt[field] = self.fill_values( rdt._pdict.get_context(field).param_type, 40) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id, 40) granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(granule) bad = [] for field in rdt.fields: if not np.array_equal(rdt[field], rdt_out[field]): print '%s' % field print '%s != %s' % (rdt[field], rdt_out[field]) bad.append(field) return bad
def test_array_visualization(self): data_product_id, stream_def_id = self.make_array_data_product() # Make a granule with an array type, give it a few values # Send it to google_dt transform, verify output rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(2208988800, 2208988810) rdt['temp_sample'] = np.arange(10*4).reshape(10,4) rdt['cond_sample'] = np.arange(10*4).reshape(10,4) granule = rdt.to_granule() dataset_monitor = DatasetMonitor(self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)) self.addCleanup(dataset_monitor.stop) self.ph.publish_rdt_to_data_product(data_product_id, rdt) dataset_monitor.event.wait(10) gdt_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('google_dt',id_only=True) gdt_stream_def = self.create_stream_definition('gdt', parameter_dictionary_id=gdt_pdict_id) gdt_data_granule = VizTransformGoogleDTAlgorithm.execute(granule, params=gdt_stream_def) rdt = RecordDictionaryTool.load_from_granule(gdt_data_granule) testval = {'data_content': [ [0.0 , 0.0 , 1.0 , 2.0 , 3.0 , 0.0 , 2.0 , 4.0 , 6.0 , 0.0 , 1.0 , 2.0 , 3.0] , [1.0 , 4.0 , 5.0 , 6.0 , 7.0 , 8.0 , 10.0 , 12.0 , 14.0 , 4.0 , 5.0 , 6.0 , 7.0] , [2.0 , 8.0 , 9.0 , 10.0 , 11.0 , 16.0 , 18.0 , 20.0 , 22.0 , 8.0 , 9.0 , 10.0 , 11.0] , [3.0 , 12.0 , 13.0 , 14.0 , 15.0 , 24.0 , 26.0 , 28.0 , 30.0 , 12.0 , 13.0 , 14.0 , 15.0] , [4.0 , 16.0 , 17.0 , 18.0 , 19.0 , 32.0 , 34.0 , 36.0 , 38.0 , 16.0 , 17.0 , 18.0 , 19.0] , [5.0 , 20.0 , 21.0 , 22.0 , 23.0 , 40.0 , 42.0 , 44.0 , 46.0 , 20.0 , 21.0 , 22.0 , 23.0] , [6.0 , 24.0 , 25.0 , 26.0 , 27.0 , 48.0 , 50.0 , 52.0 , 54.0 , 24.0 , 25.0 , 26.0 , 27.0] , [7.0 , 28.0 , 29.0 , 30.0 , 31.0 , 56.0 , 58.0 , 60.0 , 62.0 , 28.0 , 29.0 , 30.0 , 31.0] , [8.0 , 32.0 , 33.0 , 34.0 , 35.0 , 64.0 , 66.0 , 68.0 , 70.0 , 32.0 , 33.0 , 34.0 , 35.0] , [9.0 , 36.0 , 37.0 , 38.0 , 39.0 , 72.0 , 74.0 , 76.0 , 78.0 , 36.0 , 37.0 , 38.0 , 39.0]] , 'data_description': [('time', 'number', 'time'), ('temp_sample[0]', 'number', 'temp_sample[0]', {'precision': '5'}), ('temp_sample[1]', 'number', 'temp_sample[1]', {'precision': '5'}), ('temp_sample[2]', 'number', 'temp_sample[2]', {'precision': '5'}), ('temp_sample[3]', 'number', 'temp_sample[3]', {'precision': '5'}), ('temp_offset[0]', 'number', 'temp_offset[0]', {'precision': '5'}), ('temp_offset[1]', 'number', 'temp_offset[1]', {'precision': '5'}), ('temp_offset[2]', 'number', 'temp_offset[2]', {'precision': '5'}), ('temp_offset[3]', 'number', 'temp_offset[3]', {'precision': '5'}), ('cond_sample[0]', 'number', 'cond_sample[0]', {'precision': '5'}), ('cond_sample[1]', 'number', 'cond_sample[1]', {'precision': '5'}), ('cond_sample[2]', 'number', 'cond_sample[2]', {'precision': '5'}), ('cond_sample[3]', 'number', 'cond_sample[3]', {'precision': '5'})], 'viz_product_type': 'google_dt'} self.assertEquals(rdt['google_dt_components'][0], testval)
def _replay(self): coverage = DatasetManagementService._get_coverage(self.dataset_id,mode='r') rdt = self._cov2granule(coverage=coverage, start_time=self.start_time, end_time=self.end_time, stride_time=self.stride_time, parameters=self.parameters, stream_def_id=self.stream_def_id) elements = len(rdt) for i in xrange(elements / self.publish_limit): outgoing = RecordDictionaryTool(stream_definition_id=self.stream_def_id) fields = self.parameters or outgoing.fields for field in fields: v = rdt[field] if v is not None: outgoing[field] = v[(i*self.publish_limit) : ((i+1)*self.publish_limit)] yield outgoing coverage.close(timeout=5) return
def _data_dict_to_rdt(cls, data_dict, stream_def_id, coverage): if stream_def_id: rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) else: rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary) if not data_dict: log.warning('Retrieve returning empty set') return rdt if 'time' in data_dict and data_dict['time'].shape[0] == 0: log.warning('Retrieve returning empty set') return rdt rdt[coverage.temporal_parameter_name] = data_dict[coverage.temporal_parameter_name] for field in rdt.fields: if field == coverage.temporal_parameter_name: continue # The values have already been inside a coverage so we know they're safe and they exist, so they can be inserted directly. if field in data_dict: rdt._rd[field] = data_dict[field] #rdt[k] = v return rdt
def test_filter(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) filtered_stream_def_id = self.pubsub_management.create_stream_definition('filtered', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp']) self.addCleanup(self.pubsub_management.delete_stream_definition, filtered_stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id) self.assertEquals(rdt._available_fields,['time','temp']) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) with self.assertRaises(KeyError): rdt['pressure'] = np.arange(20) granule = rdt.to_granule() rdt2 = RecordDictionaryTool.load_from_granule(granule) self.assertEquals(rdt._available_fields, rdt2._available_fields) self.assertEquals(rdt.fields, rdt2.fields) for k,v in rdt.iteritems(): self.assertTrue(np.array_equal(rdt[k], rdt2[k]))
def rdt_to_granule(self, context, value_array, comp_val=None): pdict = ParameterDictionary() pdict.add_context(context) rdt = RecordDictionaryTool(param_dictionary=pdict) rdt['test'] = value_array granule = rdt.to_granule() rdt2 = RecordDictionaryTool.load_from_granule(granule) testval = comp_val if comp_val is not None else value_array actual = rdt2['test'] if isinstance(testval, basestring): self.assertEquals(testval, actual) else: np.testing.assert_array_equal(testval, actual)
def check_presta_instrument_data_products(self, reference_designator): # Check the parsed data product make sure it's got everything it needs and can be published persisted etc. # Absolute Pressure (SFLPRES_L0) is what comes off the instrumnet, SFLPRES_L1 is a pfunc # Let's go ahead and publish some fake data!!! # According to https://alfresco.oceanobservatories.org/alfresco/d/d/workspace/SpacesStore/63e16865-9d9e-4b11-b0b3-d5658faa5080/1341-00230_Data_Product_Spec_SFLPRES_OOI.pdf # Appendix A. Example 1. # p_psia_tide = 14.8670 # the tide should be 10.2504 passing = True info_list = [] passing &= self.check_data_product_reference(reference_designator, info_list) if not passing: return passing data_product_id, stream_def_id, dataset_id = info_list.pop() now = time.time() ntp_now = now + 2208988800. rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [ntp_now] rdt['absolute_pressure'] = [14.8670] passing &= self.assert_array_almost_equal(rdt['seafloor_pressure'], [10.2504], 4) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue( dataset_monitor.wait()) # Bumped to 20 to keep buildbot happy if not passing: return passing granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now]) passing &= self.assert_array_almost_equal(rdt['seafloor_pressure'], [10.2504], 4) passing &= self.assert_array_almost_equal(rdt['absolute_pressure'], [14.8670], 4) return passing
def rdt_to_granule(self, context, value_array, comp_val=None): time = ParameterContext( name='time', param_type=QuantityType(value_encoding=np.float64)) pdict = ParameterDictionary() pdict.add_context(time, is_temporal=True) pdict.add_context(context) rdt = RecordDictionaryTool(param_dictionary=pdict) rdt['time'] = np.arange(len(value_array)) rdt['test'] = value_array granule = rdt.to_granule() rdt2 = RecordDictionaryTool.load_from_granule(granule) testval = comp_val if comp_val is not None else value_array actual = rdt2['test'] if isinstance(testval, basestring): self.assertEquals(testval, actual) else: np.testing.assert_array_equal(testval, actual)
def get_last_values(cls, dataset_id, number_of_points=100, delivery_format=''): stream_def_id = delivery_format try: cov = DatasetManagementService._get_coverage(dataset_id, mode='r') if cov.is_empty(): rdt = RecordDictionaryTool(param_dictionary=cov.parameter_dictionary) else: time_array = cov.get_parameter_values([cov.temporal_parameter_name], sort_parameter=cov.temporal_parameter_name).get_data() time_array = time_array[cov.temporal_parameter_name][-number_of_points:] t0 = np.asscalar(time_array[0]) t1 = np.asscalar(time_array[-1]) data_dict = cov.get_parameter_values(time_segment=(t0, t1), fill_empty_params=True).get_data() rdt = cls._data_dict_to_rdt(data_dict, stream_def_id, cov) except: log.exception('Problems reading from the coverage') raise BadRequest('Problems reading from the coverage') finally: if cov is not None: cov.close(timeout=5) return rdt
def test_execute_transform(self): streams = self.setup_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_def_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])): return if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])): return if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool( stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2))
def retrieve_oob(cls, dataset_id='', query=None, delivery_format=''): query = query or {} coverage = None try: coverage = cls._get_coverage(dataset_id) if coverage is None: raise BadRequest('no such coverage') if isinstance(coverage, SimplexCoverage) and coverage.is_empty(): log.info('Reading from an empty coverage') rdt = RecordDictionaryTool( param_dictionary=coverage.parameter_dictionary) else: args = { 'start_time': query.get('start_time', None), 'end_time': query.get('end_time', None), 'stride_time': query.get('stride_time', None), 'parameters': query.get('parameters', None), 'stream_def_id': delivery_format, 'tdoa': query.get('tdoa', None), 'sort_parameter': query.get('sort_parameter', None) } rdt = ReplayProcess._cov2granule(coverage=coverage, **args) except Exception as e: cls._eject_cache(dataset_id) data_products, _ = Container.instance.resource_registry.find_subjects( object=dataset_id, predicate=PRED.hasDataset, subject_type=RT.DataProduct) for data_product in data_products: log.error( "Data Product %s (%s) had issues reading from the coverage model\nretrieve_oob(dataset_id='%s', query=%s, delivery_format=%s)", data_product.name, data_product._id, dataset_id, query, delivery_format) log.error("Problems reading from the coverage", exc_info=True) raise BadRequest('Problems reading from the coverage') return rdt.to_granule()
def check_tempsf_instrument_data_product(self, reference_designator): passing = True info_list = [] passing &= self.check_data_product_reference(reference_designator, info_list) if not passing: return passing data_product_id, stream_def_id, dataset_id = info_list.pop() now = time.time() ntp_now = now + 2208988800 rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [ntp_now] rdt['temperature'] = [[ 25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873, 15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993, 10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098, 5.3456, 4.2994, 4.3009 ]] dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) passing &= self.assertTrue(dataset_monitor.wait()) if not passing: return passing granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now]) passing &= self.assert_array_almost_equal(rdt['temperature'], [[ 25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873, 15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993, 10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098, 5.3456, 4.2994, 4.3009 ]]) return passing
def test_granule(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition( 'ctd', parameter_dictionary_id=pdict_id) pdict = DatasetManagementService.get_parameter_dictionary_by_name( 'ctd_parsed_param_dict') self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream( 'ctd_stream', 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) self.xps.append('xp1') publisher = StandaloneStreamPublisher(stream_id, route) subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming) subscriber.start() subscription_id = self.pubsub_management.create_subscription( 'sub', stream_ids=[stream_id]) self.xns.append('sub') self.pubsub_management.activate_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['pressure'] = [20] * 10 self.assertEquals(set(pdict.keys()), set(rdt.fields)) self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter) self.rdt = rdt self.data_producer_id = 'data_producer' self.provider_metadata_update = {1: 1} publisher.publish( rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1: 1})) self.assertTrue(self.event.wait(10)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) filtered_stream_def_id = self.pubsub_management.create_stream_definition( 'filtered', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp']) self.addCleanup(self.pubsub_management.delete_stream_definition, filtered_stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id) self.assertEquals(rdt._available_fields, ['time', 'temp']) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) with self.assertRaises(KeyError): rdt['pressure'] = np.arange(20) granule = rdt.to_granule() rdt2 = RecordDictionaryTool.load_from_granule(granule) self.assertEquals(rdt._available_fields, rdt2._available_fields) self.assertEquals(rdt.fields, rdt2.fields) for k, v in rdt.iteritems(): self.assertTrue(np.array_equal(rdt[k], rdt2[k])) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.array([None, None, None]) self.assertTrue(rdt['time'] is None) rdt['time'] = np.array([None, 1, 2]) self.assertEquals(rdt['time'][0], rdt.fill_value('time'))
def check_vel3d_instrument_data_products(self, reference_designator): passing = True info_list = [] passing &= self.check_data_product_reference(reference_designator, info_list) if not passing: return passing data_product_id, stream_def_id, dataset_id = info_list.pop() pdict = self.RR2.find_parameter_dictionary_of_stream_definition_using_has_parameter_dictionary( stream_def_id) self.assertEquals(pdict.name, 'vel3d_b_sample') rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) lat = 14.6846 lon = -51.044 ts = np.array([ 3319563600, 3319567200, 3319570800, 3319574400, 3319578000, 3319581600, 3319585200, 3319588800, 3319592400, 3319596000 ], dtype=np.float) ve = np.array([-3.2, 0.1, 0., 2.3, -0.1, 5.6, 5.1, 5.8, 8.8, 10.3]) vn = np.array([18.2, 9.9, 12., 6.6, 7.4, 3.4, -2.6, 0.2, -1.5, 4.1]) vu = np.array([-1.1, -0.6, -1.4, -2, -1.7, -2, 1.3, -1.6, -1.1, -4.5]) ve_expected = np.array([ -0.085136, -0.028752, -0.036007, 0.002136, -0.023158, 0.043218, 0.056451, 0.054727, 0.088446, 0.085952 ]) vn_expected = np.array([ 0.164012, 0.094738, 0.114471, 0.06986, 0.07029, 0.049237, -0.009499, 0.019311, 0.012096, 0.070017 ]) vu_expected = np.array([ -0.011, -0.006, -0.014, -0.02, -0.017, -0.02, 0.013, -0.016, -0.011, -0.045 ]) rdt['time'] = ts rdt['lat'] = [lat] * 10 rdt['lon'] = [lon] * 10 rdt['turbulent_velocity_east'] = ve rdt['turbulent_velocity_north'] = vn rdt['turbulent_velocity_up'] = vu passing &= self.assert_array_almost_equal( rdt['eastward_turbulent_velocity'], ve_expected) passing &= self.assert_array_almost_equal( rdt['northward_turbulent_velocity'], vn_expected) passing &= self.assert_array_almost_equal( rdt['upward_turbulent_velocity'], vu_expected) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) passing &= self.assertTrue(dataset_monitor.wait()) if not passing: return passing granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) passing &= self.assert_array_almost_equal( rdt['eastward_turbulent_velocity'], ve_expected) passing &= self.assert_array_almost_equal( rdt['northward_turbulent_velocity'], vn_expected) passing &= self.assert_array_almost_equal( rdt['upward_turbulent_velocity'], vu_expected) return passing
def check_trhph_instrument_data_products(self, reference_designator): passing = True info_list = [] passing &= self.check_data_product_reference(reference_designator, info_list) if not passing: return passing data_product_id, stream_def_id, dataset_id = info_list.pop() pdict = self.RR2.find_parameter_dictionary_of_stream_definition_using_has_parameter_dictionary( stream_def_id) passing &= self.assertEquals(pdict.name, 'trhph_sample') rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) # calibration constants a = 1.98e-9 b = -2.45e-6 c = 9.28e-4 d = -0.0888 e = 0.731 V_s = 1.506 V_c = 0. T = 11.8 r1 = 0.906 r2 = 4.095 r3 = 4.095 ORP_V = 1.806 Cl = np.nan offset = 2008 gain = 4.0 # Normally this would be 50 per the DPS but the precision is %4.0f which truncates the values to the nearest 1... ORP = ((ORP_V * 1000.) - offset) / gain ntp_now = time.time() + 2208988800 rdt['cc_a'] = [a] rdt['cc_b'] = [b] rdt['cc_c'] = [c] rdt['cc_d'] = [d] rdt['cc_e'] = [e] rdt['ref_temp_volts'] = [V_s] rdt['resistivity_temp_volts'] = [V_c] rdt['eh_sensor'] = [ORP_V] rdt['resistivity_5'] = [r1] rdt['resistivity_x1'] = [r2] rdt['resistivity_x5'] = [r3] rdt['cc_offset'] = [offset] rdt['cc_gain'] = [gain] rdt['time'] = [ntp_now] passing &= self.assert_array_almost_equal(rdt['vent_fluid_temperaure'], [T], 2) passing &= self.assert_array_almost_equal( rdt['vent_fluid_chloride_conc'], [Cl], 4) passing &= self.assert_array_almost_equal(rdt['vent_fluid_orp'], [ORP], 4) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) passing &= self.assertTrue(dataset_monitor.wait()) if not passing: return passing granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) passing &= self.assert_array_almost_equal(rdt['vent_fluid_temperaure'], [T], 2) passing &= self.assert_array_almost_equal( rdt['vent_fluid_chloride_conc'], [Cl], 4) passing &= self.assert_array_almost_equal(rdt['vent_fluid_orp'], [ORP], 4) return passing
def _coverage_to_granule(cls, coverage, start_time=None, end_time=None, stride_time=None, fuzzy_stride=True, parameters=None, stream_def_id=None, tdoa=None): slice_ = slice(None) # Defaults to all values # Validations if start_time is not None: validate_is_instance(start_time, Number, 'start_time must be a number for striding.') if end_time is not None: validate_is_instance(end_time, Number, 'end_time must be a number for striding.') if stride_time is not None: validate_is_instance(stride_time, Number, 'stride_time must be a number for striding.') if tdoa is not None and isinstance(tdoa, slice): slice_ = tdoa elif stride_time is not None and not fuzzy_stride: # SLOW ugly_range = np.arange(start_time, end_time, stride_time) idx_values = [cls.get_time_idx(coverage, i) for i in ugly_range] idx_values = list( set(idx_values) ) # Removing duplicates - also mixes the order of the list!!! idx_values.sort() slice_ = [idx_values] elif not (start_time is None and end_time is None): if start_time is not None: start_time = cls.get_time_idx(coverage, start_time) if end_time is not None: end_time = cls.get_time_idx(coverage, end_time) slice_ = slice(start_time, end_time, stride_time) log.info('Slice: %s', slice_) if stream_def_id: rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) else: rdt = RecordDictionaryTool( param_dictionary=coverage.parameter_dictionary) if parameters is not None: # TODO: Improve efficiency here fields = list(set(parameters).intersection(rdt.fields)) else: fields = rdt.fields if slice_.start == slice_.stop and slice_.start is not None: log.warning('Requested empty set of data. %s', slice_) return rdt # Do time first tname = coverage.temporal_parameter_name cls.map_cov_rdt(coverage, rdt, tname, slice_) for field in fields: if field == tname: continue cls.map_cov_rdt(coverage, rdt, field, slice_) return rdt
def test_rdt_param_funcs(self): param_funcs = { 'identity' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.interpolation', 'function' : 'identity', 'args':['x'] }, 'ctd_tempwat' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_sbe37im_tempwat', 'args' : ['t0'] }, 'ctd_preswat' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_sbe37im_preswat', 'args' : ["p0", "p_range_psia"] }, 'ctd_condwat' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_sbe37im_condwat', 'args' : ['c0'] }, 'ctd_pracsal' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_pracsal', 'args' : ['c', 't', 'p'] }, 'ctd_density' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_density', 'args' : ['SP','t','p','lat','lon'] } } pfunc_ids = {} for name, param_def in param_funcs.iteritems(): paramfunc = ParameterFunction(name, **param_def) pf_id = self.dataset_management.create_parameter_function(paramfunc) pfunc_ids[name] = pf_id params = { 'time' : { 'parameter_type' : 'quantity', 'value_encoding' : 'float64', 'units' : 'seconds since 1900-01-01' }, 'temperature_counts' : { 'parameter_type' : 'quantity', 'value_encoding' : 'float32', 'units' : '1' }, 'pressure_counts' : { 'parameter_type' : 'quantity', 'value_encoding' : 'float32', 'units' : '1' }, 'conductivity_counts' : { 'parameter_type' : 'quantity', 'value_encoding' : 'float32', 'units' : '1' }, 'temperature' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_tempwat'], 'parameter_function_map' : { 't0' : 'temperature_counts'}, 'value_encoding' : 'float32', 'units' : 'deg_C' }, 'pressure' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_preswat'], 'parameter_function_map' : {'p0' : 'pressure_counts', 'p_range_psia' : 679.34040721}, 'value_encoding' : 'float32', 'units' : 'dbar' }, 'conductivity' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_condwat'], 'parameter_function_map' : {'c0' : 'conductivity_counts'}, 'value_encoding' : 'float32', 'units' : 'Sm-1' }, 'salinity' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_pracsal'], 'parameter_function_map' : {'c' : 'conductivity', 't' : 'temperature', 'p' : 'pressure'}, 'value_encoding' : 'float32', 'units' : '1' }, 'density' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_density'], 'parameter_function_map' : { 'SP' : 'salinity', 't' : 'temperature', 'p' : 'pressure', 'lat' : 'lat', 'lon' : 'lon' }, 'value_encoding' : 'float32', 'units' : 'kg m-1' }, 'lat' : { 'parameter_type' : 'sparse', 'value_encoding' : 'float32', 'units' : 'degrees_north' }, 'lon' : { 'parameter_type' : 'sparse', 'value_encoding' : 'float32', 'units' : 'degrees_east' } } param_dict = {} for name, param in params.iteritems(): pcontext = ParameterContext(name, **param) param_id = self.dataset_management.create_parameter(pcontext) param_dict[name] = param_id pdict_id = self.dataset_management.create_parameter_dictionary('ctd_test', param_dict.values(), 'time') stream_def_id = self.pubsub_management.create_stream_definition('ctd_test', parameter_dictionary_id=pdict_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [0] rdt['temperature_counts'] = [280000] rdt['conductivity_counts'] = [100000] rdt['pressure_counts'] = [2789] rdt['lat'] = [45] rdt['lon'] = [-71] np.testing.assert_allclose(rdt['density'], np.array([1001.00543606]))
def sync_rdt_with_coverage(self, coverage=None, tdoa=None, start_time=None, end_time=None, stride_time=None, parameters=None): ''' Builds a granule based on the coverage ''' if coverage is None: coverage = self.coverage slice_ = slice(None) # Defaults to all values if tdoa is not None and isinstance(tdoa, slice): slice_ = tdoa elif stride_time is not None: validate_is_instance(start_time, Number, 'start_time must be a number for striding.') validate_is_instance(end_time, Number, 'end_time must be a number for striding.') validate_is_instance(stride_time, Number, 'stride_time must be a number for striding.') ugly_range = np.arange(start_time, end_time, stride_time) idx_values = [ TimeUtils.get_relative_time(coverage, i) for i in ugly_range ] slice_ = [idx_values] elif not (start_time is None and end_time is None): time_var = coverage._temporal_param_name uom = coverage.get_parameter_context(time_var).uom if start_time is not None: start_units = TimeUtils.ts_to_units(uom, start_time) log.info('Units: %s', start_units) start_idx = TimeUtils.get_relative_time(coverage, start_units) log.info('Start Index: %s', start_idx) start_time = start_idx if end_time is not None: end_units = TimeUtils.ts_to_units(uom, end_time) log.info('End units: %s', end_units) end_idx = TimeUtils.get_relative_time(coverage, end_units) log.info('End index: %s', end_idx) end_time = end_idx slice_ = slice(start_time, end_time, stride_time) log.info('Slice: %s', slice_) if parameters is not None: pdict = ParameterDictionary() params = set(coverage.list_parameters()).intersection(parameters) for param in params: pdict.add_context(coverage.get_parameter_context(param)) rdt = RecordDictionaryTool(param_dictionary=pdict) self.pdict = pdict else: rdt = RecordDictionaryTool( param_dictionary=coverage.parameter_dictionary) fields = coverage.list_parameters() if parameters is not None: fields = set(fields).intersection(parameters) for d in fields: rdt[d] = coverage.get_parameter_values(d, tdoa=slice_) self.rdt = rdt # Sync