def test_coefficient_compatibility(self): data_product_id = self.create_data_product( name='Calibration Coefficient Test Data product', stream_def_id=self.stream_def_id) self.data_product_management.activate_data_product_persistence( data_product_id) self.addCleanup( self.data_product_management.suspend_data_product_persistence, data_product_id) rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = [10] * 10 rdt['cc_coefficient'] = [2] * 10 dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset( data_product_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dataset_monitor.wait()) rdt2 = RecordDictionaryTool.load_from_granule( self.data_retriever.retrieve(dataset_id)) np.testing.assert_array_equal(rdt2['offset'], [12] * 10)
def execute_retrieve(self): ''' execute_retrieve Executes a retrieval and returns the result as a value in lieu of publishing it on a stream ''' try: coverage = DatasetManagementService._get_coverage(self.dataset_id, mode='r') if coverage.num_timesteps == 0: log.info('Reading from an empty coverage') rdt = RecordDictionaryTool( param_dictionary=coverage.parameter_dictionary) else: rdt = self._coverage_to_granule(coverage=coverage, start_time=self.start_time, end_time=self.end_time, stride_time=self.stride_time, parameters=self.parameters, tdoa=self.tdoa) except: log.exception('Problems reading from the coverage') raise BadRequest('Problems reading from the coverage') finally: coverage.close(timeout=5) return rdt.to_granule()
def test_execute_advanced_transform(self): # Runs a transform across L0-L2 with stream definitions including available fields streams = self.setup_advanced_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_defs_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['rho'], np.array([1001.0055034])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool( stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2))
def test_serialize_compatability(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition('ctd extended', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('ctd1', 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) sub_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) verified = Event() def verifier(msg, route, stream_id): for k,v in msg.record_dictionary.iteritems(): if v is not None: self.assertIsInstance(v, np.ndarray) rdt = RecordDictionaryTool.load_from_granule(msg) for k,v in rdt.iteritems(): self.assertIsInstance(rdt[k], np.ndarray) self.assertIsInstance(v, np.ndarray) verified.set() subscriber = StandaloneStreamSubscriber('sub1', callback=verifier) subscriber.start() self.addCleanup(subscriber.stop) publisher = StandaloneStreamPublisher(stream_id,route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) ph.fill_rdt(rdt,10) publisher.publish(rdt.to_granule()) self.assertTrue(verified.wait(60))
def test_execute_advanced_transform(self): # Runs a transform across L0-L2 with stream definitions including available fields streams = self.setup_advanced_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_defs_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['rho'], np.array([1001.0055034])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool(stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2))
def test_execute_transform(self): available_fields_in = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'TEMPWAT_L1','CONDWAT_L1','PRESWAT_L1','PRACSAL', 'DENSITY'] exchange_pt1 = 'xp1' exchange_pt2 = 'xp2' stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out) rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id) dt = 20 rdt_in['time'] = np.arange(dt) rdt_in['lat'] = [40.992469] * dt rdt_in['lon'] = [-71.727069] * dt rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,)) rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,)) rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,)) msg = rdt_in.to_granule() #pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',{'process':{'routes':{(stream_id_in, stream_id_out):None},'stream_id':stream_id_out}}) config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}} pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config) rdt_out = self.container.proc_manager.procs[pid]._execute_transform(msg, (stream_id_in,stream_id_out)) #need below to wrap result in a param val object rdt_out = RecordDictionaryTool.load_from_granule(rdt_out.to_granule()) for k,v in rdt_out.iteritems(): self.assertEqual(len(v), dt) self._validate_transforms(rdt_in, rdt_out) self.container.proc_manager.terminate_process(pid)
def publish_to_data_product(self, data_product_id): stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) self.assertTrue(len(stream_ids)) stream_id = stream_ids.pop() route = self.pubsub_management.read_stream_route(stream_id) stream_definition = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_definition._id publisher = StandaloneStreamPublisher(stream_id, route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) now = time.time() ntp_now = now + 2208988800 # Do not use in production, this is a loose translation rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [300000] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['time'] = [ntp_now] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [None] rdt['lat'] = [45] rdt['conductivity'] = [4341400] rdt['driver_timestamp'] = [ntp_now] rdt['lon'] = [-71] rdt['pressure'] = [256.8] granule = rdt.to_granule() publisher.publish(granule)
def execute_retrieve(self): """ execute_retrieve Executes a retrieval and returns the result as a value in lieu of publishing it on a stream """ try: coverage = DatasetManagementService._get_coverage(self.dataset_id, mode="r") if coverage.num_timesteps == 0: log.info("Reading from an empty coverage") rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary) else: rdt = self._coverage_to_granule( coverage=coverage, start_time=self.start_time, end_time=self.end_time, stride_time=self.stride_time, parameters=self.parameters, tdoa=self.tdoa, ) except: log.exception("Problems reading from the coverage") raise BadRequest("Problems reading from the coverage") finally: coverage.close(timeout=5) return rdt.to_granule()
def retrieve_oob(cls, dataset_id='', query=None, delivery_format=''): query = query or {} coverage = None try: coverage = cls._get_coverage(dataset_id) if coverage is None: raise BadRequest('no such coverage') if isinstance(coverage, SimplexCoverage) and coverage.is_empty(): log.info('Reading from an empty coverage') rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary) else: args = { 'start_time' : query.get('start_time', None), 'end_time' : query.get('end_time', None), 'stride_time' : query.get('stride_time', None), 'parameters' : query.get('parameters', None), 'stream_def_id' : delivery_format, 'tdoa' : query.get('tdoa', None), 'sort_parameter' : query.get('sort_parameter', None) } rdt = ReplayProcess._cov2granule(coverage=coverage, **args) except Exception as e: cls._eject_cache(dataset_id) data_products, _ = Container.instance.resource_registry.find_subjects(object=dataset_id, predicate=PRED.hasDataset, subject_type=RT.DataProduct) for data_product in data_products: log.error("Data Product %s (%s) had issues reading from the coverage model\nretrieve_oob(dataset_id='%s', query=%s, delivery_format=%s)", data_product.name, data_product._id, dataset_id, query, delivery_format) log.error("Problems reading from the coverage", exc_info=True) raise BadRequest('Problems reading from the coverage') return rdt.to_granule()
def _cov2granule(cls, coverage, start_time=None, end_time=None, stride_time=None, stream_def_id=None, parameters=None, tdoa=None): if tdoa is None: if start_time is not None: start_time = cls.convert_time(coverage, start_time) if end_time is not None: end_time = cls.convert_time(coverage, end_time) slice_ = slice(start_time, end_time, stride_time) if stream_def_id: rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) else: rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary) if tdoa: vdict = coverage.get_value_dictionary(parameters or rdt.fields, domain_slice=tdoa) else: vdict = coverage.get_value_dictionary(parameters or rdt.fields, temporal_slice=slice_) if not vdict: log.warning('Retrieve returning empty set') return rdt rdt[coverage.temporal_parameter_name] = vdict[coverage.temporal_parameter_name] for k,v in vdict.iteritems(): if k == coverage.temporal_parameter_name: continue # The values have already been inside a coverage so we know they're safe and they exist, so they can be inserted directly. rdt._rd[k] = v #rdt[k] = v return rdt
def retrieve_oob(cls, dataset_id='', query=None, delivery_format=''): query = query or {} coverage = None try: coverage = cls._get_coverage(dataset_id) if coverage is None: raise BadRequest('no such coverage') if coverage.num_timesteps == 0: log.info('Reading from an empty coverage') rdt = RecordDictionaryTool( param_dictionary=coverage.parameter_dictionary) else: rdt = ReplayProcess._coverage_to_granule( coverage=coverage, start_time=query.get('start_time', None), end_time=query.get('end_time', None), stride_time=query.get('stride_time', None), parameters=query.get('parameters', None), stream_def_id=delivery_format, tdoa=query.get('tdoa', None)) except: cls._eject_cache(dataset_id) log.exception('Problems reading from the coverage') raise BadRequest('Problems reading from the coverage') return rdt.to_granule()
def test_derived_data_product(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsubcli.delete_stream_definition, ctd_stream_def_id) tdom, sdom = time_series_domain() dp = DataProduct(name='Instrument DP', temporal_domain=tdom.dump(), spatial_domain=sdom.dump()) dp_id = self.dpsc_cli.create_data_product(dp, stream_definition_id=ctd_stream_def_id) self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id) self.dpsc_cli.activate_data_product_persistence(dp_id) self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id) dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True) if not dataset_ids: raise NotFound("Data Product %s dataset does not exist" % str(dp_id)) dataset_id = dataset_ids[0] # Make the derived data product simple_stream_def_id = self.pubsubcli.create_stream_definition(name='TEMPWAT stream def', parameter_dictionary_id=pdict_id, available_fields=['time','temp']) tempwat_dp = DataProduct(name='TEMPWAT') tempwat_dp_id = self.dpsc_cli.create_data_product(tempwat_dp, stream_definition_id=simple_stream_def_id, parent_data_product_id=dp_id) self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id) self.dpsc_cli.activate_data_product_persistence(tempwat_dp_id) self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, tempwat_dp_id) # Check that the streams associated with the data product are persisted with stream_ids, _ = self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True) for stream_id in stream_ids: self.assertTrue(self.ingestclient.is_persisted(stream_id)) stream_id = stream_ids[0] route = self.pubsubcli.read_stream_route(stream_id=stream_id) rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) rdt['pressure'] = np.arange(20) publisher = StandaloneStreamPublisher(stream_id,route) dataset_modified = Event() def cb(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True) es.start() self.addCleanup(es.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_modified.wait(30)) tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id, PRED.hasDataset, id_only=True) tempwat_dataset_id = tempwat_dataset_ids[0] granule = self.data_retriever.retrieve(tempwat_dataset_id, delivery_format=simple_stream_def_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(20)) self.assertEquals(set(rdt.fields), set(['time','temp']))
def test_granule(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"}) pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict') self.addCleanup(self.pubsub_management.delete_stream_definition,stream_def_id) stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream,stream_id) publisher = StandaloneStreamPublisher(stream_id, route) subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming) subscriber.start() self.addCleanup(subscriber.stop) subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id]) self.pubsub_management.activate_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['pressure'] = [20] * 10 self.assertEquals(set(pdict.keys()), set(rdt.fields)) self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter) self.assertEquals(rdt._stream_config['reference_designator'],"GA03FLMA-RI001-13-CTDMOG999") self.rdt = rdt self.data_producer_id = 'data_producer' self.provider_metadata_update = {1:1} publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1})) self.assertTrue(self.event.wait(10)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.array([None,None,None]) self.assertTrue(rdt['time'] is None) rdt['time'] = np.array([None, 1, 2]) self.assertEquals(rdt['time'][0], rdt.fill_value('time')) stream_def_obj = self.pubsub_management.read_stream_definition(stream_def_id) rdt = RecordDictionaryTool(stream_definition=stream_def_obj) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) granule = rdt.to_granule() rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(20)) np.testing.assert_array_equal(rdt['temp'], np.arange(20))
def test_array_flow_paths(self): data_product_id, stream_def_id = self.make_array_data_product() dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id) dm = DatasetMonitor(dataset_id) self.addCleanup(dm.stop) # I need to make sure that we can fill the RDT with its values # Test for one timestep # Test for multiple timesteps # Publishes # Ingests correctly # Retrieves correctly #-------------------------------------------------------------------------------- # Ensure that the RDT can be filled with ArrayType values #-------------------------------------------------------------------------------- rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [0] rdt['temp_sample'] = [[0,1,2,3,4]] np.testing.assert_array_equal(rdt['temp_sample'], np.array([[0,1,2,3,4]])) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dm.event.wait(10)) dm.event.clear() granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['temp_sample'], np.array([[0,1,2,3,4]])) #-------------------------------------------------------------------------------- # Ensure that it deals with multiple values #-------------------------------------------------------------------------------- rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [1,2,3] rdt['temp_sample'] = [[0,1,2,3,4],[1],[5,5,5,5,5]] m = rdt.fill_value('temp_sample') or np.finfo(np.float32).max np.testing.assert_equal(m,np.finfo(np.float32).max) np.testing.assert_array_equal(rdt['temp_sample'], [[0,1,2,3,4],[1,m,m,m,m],[5,5,5,5,5]]) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dm.event.wait(10)) dm.event.clear() #-------------------------------------------------------------------------------- # Retrieve and Verify #-------------------------------------------------------------------------------- retrieved_granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(retrieved_granule) np.testing.assert_array_equal(rdt['time'], np.array([0,1,2,3])) np.testing.assert_array_equal(rdt['temp_sample'], np.array([[0,1,2,3,4],[0,1,2,3,4],[1,m,m,m,m],[5,5,5,5,5]]))
def get_granule(self, time=None, pd=None): lat,lon,_ = self.get_location(time) value = self.get_value(time) pkg = RecordDictionaryTool(pd) pkg['salinity'] = array([value]*self.message_size) pkg['lat'] = array([lat]*self.message_size) pkg['lon'] = array([lon]*self.message_size) granule = pkg.to_granule() return granule
def __init__(self,coverage=None, granule=None): if coverage is None: self.coverage = self.create_coverage() self.rdt = RecordDictionaryTool(param_dictionary=self.coverage.parameter_dictionary) else: self.coverage = coverage if granule is not None: self.sync_with_granule(granule) else: self.rdt = RecordDictionaryTool(param_dictionary=self.coverage.parameter_dictionary) self.pdict = self.coverage.parameter_dictionary
def get_last_values(cls, dataset_id, number_of_points): coverage = DatasetManagementService._get_coverage(dataset_id,mode='r') if coverage.num_timesteps < number_of_points: if coverage.num_timesteps == 0: rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary) return rdt.to_granule() number_of_points = coverage.num_timesteps rdt = cls._coverage_to_granule(coverage,tdoa=slice(-number_of_points,None)) coverage.close(timeout=5) return rdt.to_granule()
def __init__(self, coverage=None, granule=None): if coverage is None: self.coverage = self.create_coverage() self.rdt = RecordDictionaryTool( param_dictionary=self.coverage.parameter_dictionary) else: self.coverage = coverage if granule is not None: self.sync_with_granule(granule) else: self.rdt = RecordDictionaryTool( param_dictionary=self.coverage.parameter_dictionary) self.pdict = self.coverage.parameter_dictionary
def get_last_values(cls, dataset_id, number_of_points): coverage = DatasetManagementService._get_coverage(dataset_id, mode='r') if coverage.num_timesteps < number_of_points: if coverage.num_timesteps == 0: rdt = RecordDictionaryTool( param_dictionary=coverage.parameter_dictionary) return rdt.to_granule() number_of_points = coverage.num_timesteps rdt = cls._coverage_to_granule(coverage, tdoa=slice(-number_of_points, None)) coverage.close(timeout=5) return rdt.to_granule()
def write_to_data_product(self, data_product_id): dataset_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasDataset', id_only=True) dataset_id = dataset_ids.pop() stream_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasStream', id_only=True) stream_id = stream_ids.pop() stream_def_ids, _ = self.resource_registry.find_objects( stream_id, 'hasStreamDefinition', id_only=True) stream_def_id = stream_def_ids.pop() route = self.pubsub_management.read_stream_route(stream_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) time_param = rdt._pdict.temporal_parameter_name if time_param is None: print '%s has no temporal parameter' % self.resource_registry.read( data_product_id).name return rdt[time_param] = np.arange(40) for field in rdt.fields: if field == rdt._pdict.temporal_parameter_name: continue rdt[field] = self.fill_values( rdt._pdict.get_context(field).param_type, 40) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id, 40) granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(granule) bad = [] for field in rdt.fields: if not np.array_equal(rdt[field], rdt_out[field]): print '%s' % field print '%s != %s' % (rdt[field], rdt_out[field]) bad.append(field) return bad
def test_array_visualization(self): data_product_id, stream_def_id = self.make_array_data_product() # Make a granule with an array type, give it a few values # Send it to google_dt transform, verify output rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(2208988800, 2208988810) rdt['temp_sample'] = np.arange(10*4).reshape(10,4) rdt['cond_sample'] = np.arange(10*4).reshape(10,4) granule = rdt.to_granule() dataset_monitor = DatasetMonitor(self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)) self.addCleanup(dataset_monitor.stop) self.ph.publish_rdt_to_data_product(data_product_id, rdt) dataset_monitor.event.wait(10) gdt_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('google_dt',id_only=True) gdt_stream_def = self.create_stream_definition('gdt', parameter_dictionary_id=gdt_pdict_id) gdt_data_granule = VizTransformGoogleDTAlgorithm.execute(granule, params=gdt_stream_def) rdt = RecordDictionaryTool.load_from_granule(gdt_data_granule) testval = {'data_content': [ [0.0 , 0.0 , 1.0 , 2.0 , 3.0 , 0.0 , 2.0 , 4.0 , 6.0 , 0.0 , 1.0 , 2.0 , 3.0] , [1.0 , 4.0 , 5.0 , 6.0 , 7.0 , 8.0 , 10.0 , 12.0 , 14.0 , 4.0 , 5.0 , 6.0 , 7.0] , [2.0 , 8.0 , 9.0 , 10.0 , 11.0 , 16.0 , 18.0 , 20.0 , 22.0 , 8.0 , 9.0 , 10.0 , 11.0] , [3.0 , 12.0 , 13.0 , 14.0 , 15.0 , 24.0 , 26.0 , 28.0 , 30.0 , 12.0 , 13.0 , 14.0 , 15.0] , [4.0 , 16.0 , 17.0 , 18.0 , 19.0 , 32.0 , 34.0 , 36.0 , 38.0 , 16.0 , 17.0 , 18.0 , 19.0] , [5.0 , 20.0 , 21.0 , 22.0 , 23.0 , 40.0 , 42.0 , 44.0 , 46.0 , 20.0 , 21.0 , 22.0 , 23.0] , [6.0 , 24.0 , 25.0 , 26.0 , 27.0 , 48.0 , 50.0 , 52.0 , 54.0 , 24.0 , 25.0 , 26.0 , 27.0] , [7.0 , 28.0 , 29.0 , 30.0 , 31.0 , 56.0 , 58.0 , 60.0 , 62.0 , 28.0 , 29.0 , 30.0 , 31.0] , [8.0 , 32.0 , 33.0 , 34.0 , 35.0 , 64.0 , 66.0 , 68.0 , 70.0 , 32.0 , 33.0 , 34.0 , 35.0] , [9.0 , 36.0 , 37.0 , 38.0 , 39.0 , 72.0 , 74.0 , 76.0 , 78.0 , 36.0 , 37.0 , 38.0 , 39.0]] , 'data_description': [('time', 'number', 'time'), ('temp_sample[0]', 'number', 'temp_sample[0]', {'precision': '5'}), ('temp_sample[1]', 'number', 'temp_sample[1]', {'precision': '5'}), ('temp_sample[2]', 'number', 'temp_sample[2]', {'precision': '5'}), ('temp_sample[3]', 'number', 'temp_sample[3]', {'precision': '5'}), ('temp_offset[0]', 'number', 'temp_offset[0]', {'precision': '5'}), ('temp_offset[1]', 'number', 'temp_offset[1]', {'precision': '5'}), ('temp_offset[2]', 'number', 'temp_offset[2]', {'precision': '5'}), ('temp_offset[3]', 'number', 'temp_offset[3]', {'precision': '5'}), ('cond_sample[0]', 'number', 'cond_sample[0]', {'precision': '5'}), ('cond_sample[1]', 'number', 'cond_sample[1]', {'precision': '5'}), ('cond_sample[2]', 'number', 'cond_sample[2]', {'precision': '5'}), ('cond_sample[3]', 'number', 'cond_sample[3]', {'precision': '5'})], 'viz_product_type': 'google_dt'} self.assertEquals(rdt['google_dt_components'][0], testval)
def cb(msg, sr, sid): self.assertEqual(sid, stream_id_out) rdt_out = RecordDictionaryTool.load_from_granule(msg) self.assertEquals(set([k for k,v in rdt_out.iteritems()]), set(available_fields_out)) for k,v in rdt_out.iteritems(): self.assertEquals(rdt_out[k], None) e.set()
def process(self, dataset_id, start_time=0, end_time=0): if not dataset_id: raise BadRequest('No dataset id specified.') now = time.time() start_time = start_time or (now - (3600*(self.run_interval+1))) # Every N hours with 1 of overlap end_time = end_time or now qc_params = [i for i in self.qc_params if i in self.qc_suffixes] or self.qc_suffixes self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent) log.debug('Iterating over the data blocks') for st,et in self.chop(int(start_time),int(end_time)): log.debug('Chopping %s:%s', st, et) log.debug("Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')", dataset_id, st, et) granule = self.data_retriever.retrieve(dataset_id, query={'start_time':st, 'end_time':et}) log.debug('Retrieved Data') rdt = RecordDictionaryTool.load_from_granule(granule) qc_fields = [i for i in rdt.fields if any([i.endswith(j) for j in qc_params])] log.debug('QC Fields: %s', qc_fields) for field in qc_fields: val = rdt[field] if val is None: continue if not np.all(val): log.debug('Found QC Alerts') indexes = np.where(val==0) timestamps = rdt[rdt.temporal_parameter][indexes[0]] self.flag_qc_parameter(dataset_id, field, timestamps.tolist(),{})
def test_append_parameter(self): # Make a CTDBP Data Product data_product_id = self.make_ctd_data_product() dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset( data_product_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) # Throw some data in it rdt = self.ph.rdt_for_data_product(data_product_id) rdt['time'] = np.arange(30) rdt['temp'] = np.arange(30) rdt['pressure'] = np.arange(30) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dataset_monitor.wait()) dataset_monitor.event.clear() # Grab the egg egg_url = self.egg_url egg_path = TransformWorker.download_egg(egg_url) import pkg_resources pkg_resources.working_set.add_entry(egg_path) self.addCleanup(os.remove, egg_path) # Make a parameter function owner = 'ion_example.add_arrays' func = 'add_arrays' arglist = ['a', 'b'] pf = ParameterFunction(name='add_arrays', function_type=PFT.PYTHON, owner=owner, function=func, args=arglist) pfunc_id = self.dataset_management.create_parameter_function(pf) self.addCleanup(self.dataset_management.delete_parameter_function, pfunc_id) # Make a context (instance of the function) context = ParameterContext(name='array_sum', units="1", fill_value="-9999", parameter_function_id=pfunc_id, parameter_type="function", value_encoding="float32", display_name="Array Summation", parameter_function_map={ 'a': 'temp', 'b': 'pressure' }) #pfunc = DatasetManagementService.get_coverage_function(pf) #pfunc.param_map = {'a':'temp', 'b':'pressure'} #ctxt = ParameterContext('array_sum', param_type=ParameterFunctionType(pfunc)) #ctxt_dump = ctxt.dump() #ctxt_id = self.dataset_management.create_parameter_context('array_sum', ctxt_dump) ctxt_id = self.dataset_management.create_parameter(context) self.dataset_management.add_parameter_to_dataset(ctxt_id, dataset_id) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['array_sum'], np.arange(0, 60, 2))
def test_example_preload(self): print 'preloading...' self.preload_example1() data_product_ids, _ = self.container.resource_registry.find_resources_ext(alt_id='DPROD102', alt_id_ns='PRE') data_product_id = data_product_ids[0] dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id) with DirectCoverageAccess() as dca: dca.upload_calibration_coefficients(dataset_id, 'test_data/sbe16coeffs.csv', 'test_data/sbe16coeffs.yml') ph = ParameterHelper(self.dataset_management, self.addCleanup) rdt = ph.rdt_for_data_product(data_product_id) rdt['time'] = [time.time() + 2208988800] rdt['temperature'] = [248471] rdt['pressure'] = [528418] rdt['conductivity'] = [1673175] rdt['thermistor_temperature']=[24303] dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ph.publish_rdt_to_data_product(data_product_id, rdt) dataset_monitor.event.wait(10) g = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(g) breakpoint(locals())
def test_instrument_simple(self): instrument_model_id = self.create_instrument_model() instrument_agent_id = self.create_instrument_agent(instrument_model_id) instrument_device_id = self.create_instrument_device(instrument_model_id) instrument_agent_instance_id = self.create_instrument_agent_instance(instrument_agent_id, instrument_device_id) raw_dp_id, parsed_dp_id = self.create_instrument_data_products(instrument_device_id) self.start_instrument_agent_instance(instrument_agent_instance_id) agent_process_id = self.poll_instrument_agent_instance(instrument_agent_instance_id, instrument_device_id) agent_client = ResourceAgentClient(instrument_device_id, to_name=agent_process_id, process=FakeProcess()) self.agent_state_transition(agent_client, ResourceAgentEvent.INITIALIZE, ResourceAgentState.INACTIVE) self.agent_state_transition(agent_client, ResourceAgentEvent.GO_ACTIVE, ResourceAgentState.IDLE) self.agent_state_transition(agent_client, ResourceAgentEvent.RUN, ResourceAgentState.COMMAND) dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(parsed_dp_id) for i in xrange(10): monitor = DatasetMonitor(dataset_id=dataset_id) agent_client.execute_resource(AgentCommand(command=SBE37ProtocolEvent.ACQUIRE_SAMPLE)) if not monitor.event.wait(30): raise AssertionError('Failed on the %ith granule' % i) monitor.stop() rdt = RecordDictionaryTool.load_from_granule(self.data_retriever.retrieve(dataset_id)) self.assertEquals(len(rdt), 10)
def check_tempsf_instrument_data_product(self, reference_designator): passing = True info_list = [] passing &= self.check_data_product_reference(reference_designator, info_list) if not passing: return passing data_product_id, stream_def_id, dataset_id = info_list.pop() now = time.time() ntp_now = now + 2208988800 rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [ntp_now] rdt['temperature'] = [[ 25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873, 15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993, 10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098, 5.3456, 4.2994, 4.3009]] dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) passing &= self.assertTrue(dataset_monitor.event.wait(20)) if not passing: return passing granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now]) passing &= self.assert_array_almost_equal(rdt['temperature'], [[ 25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873, 15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993, 10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098, 5.3456, 4.2994, 4.3009]]) return passing
def test_instrument_simple(self): instrument_model_id = self.create_instrument_model() instrument_agent_id = self.create_instrument_agent(instrument_model_id) instrument_device_id = self.create_instrument_device(instrument_model_id) instrument_agent_instance_id = self.create_instrument_agent_instance(instrument_agent_id, instrument_device_id) raw_dp_id, parsed_dp_id = self.create_instrument_data_products(instrument_device_id) self.start_instrument_agent_instance(instrument_agent_instance_id) agent_process_id = self.poll_instrument_agent_instance(instrument_agent_instance_id, instrument_device_id) agent_client = ResourceAgentClient(instrument_device_id, to_name=agent_process_id, process=FakeProcess()) self.agent_state_transition(agent_client, ResourceAgentEvent.INITIALIZE, ResourceAgentState.INACTIVE) self.agent_state_transition(agent_client, ResourceAgentEvent.GO_ACTIVE, ResourceAgentState.IDLE) self.agent_state_transition(agent_client, ResourceAgentEvent.RUN, ResourceAgentState.COMMAND) dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(parsed_dp_id) for i in xrange(10): monitor = DatasetMonitor(dataset_id=dataset_id) agent_client.execute_resource(AgentCommand(command=SBE37ProtocolEvent.ACQUIRE_SAMPLE)) if not monitor.wait(): raise AssertionError('Failed on the %ith granule' % i) monitor.stop() rdt = RecordDictionaryTool.load_from_granule(self.data_retriever.retrieve(dataset_id)) self.assertEquals(len(rdt), 10)
def test_example2_preload(self): print 'preloading...' self.preload_example2() data_product_ids, _ = self.container.resource_registry.find_resources_ext(alt_id='DPROD104', alt_id_ns='PRE') data_product_id = data_product_ids[0] dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id) with DirectCoverageAccess() as dca: dca.upload_calibration_coefficients(dataset_id, 'test_data/vel3d_coeff.csv', 'test_data/vel3d_coeff.yml') from ion_functions.data.test.test_vel_functions import TS, VE, VN, VU rdt = ParameterHelper.rdt_for_data_product(data_product_id) rdt['time'] = [time.time() + 2208988800] rdt['velocity_east'] = [VE[0]] rdt['velocity_north'] = [VN[0]] rdt['velocity_up'] = [VU[0]] dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) dataset_monitor.event.wait(10) g = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(g) breakpoint(locals())
def verify_incoming(self, m,r,s): rdt = RecordDictionaryTool.load_from_granule(m) self.assertEquals(rdt, self.rdt) self.assertEquals(m.data_producer_id, self.data_producer_id) self.assertEquals(m.provider_metadata_update, self.provider_metadata_update) self.assertNotEqual(m.creation_timestamp, None) self.event.set()
def test_lctest_preload(self): self.preload_lctest() pdict_id = self.dataset_management.read_parameter_dictionary_by_name('sparse_dict', id_only=True) stream_def_id = self.create_stream_definition('sparse_example', parameter_dictionary_id=pdict_id) data_product_id = self.create_data_product('sparse_example', stream_def_id=stream_def_id) self.activate_data_product(data_product_id) dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id) rdt = ParameterHelper.rdt_for_data_product(data_product_id) rdt['time'] = [time.time() + 2208988800] rdt['sparse_float'] = [3.14159265358979323] rdt['sparse_double'] = [2.7182818284590452353602874713526624977572470936999595] rdt['sparse_int'] = [131071] # 6th mersenne prime dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) dataset_monitor.event.wait(10) for i in xrange(10): dataset_monitor.event.clear() rdt = ParameterHelper.rdt_for_data_product(data_product_id) rdt['time'] = [time.time() + 2208988800] ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) dataset_monitor.event.wait(10) g = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(g) breakpoint(locals())
def test_transform_prime_no_available_fields(self): available_fields_in = [] available_fields_out = [] exchange_pt1 = 'xp1' exchange_pt2 = 'xp2' stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out) #launch transform config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}} pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config) #create publish publisher = StandaloneStreamPublisher(stream_id_in, stream_route_in) self.container.proc_manager.procs[pid].subscriber.xn.bind(stream_route_in.routing_key, publisher.xp) #data rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id) dt = 20 rdt_in['time'] = np.arange(dt) rdt_in['lat'] = [40.992469] * dt rdt_in['lon'] = [-71.727069] * dt rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,)) rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,)) rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,)) msg = rdt_in.to_granule() #publish granule to transform and have transform publish it to subsciber #validate transformed data e = gevent.event.Event() def cb(msg, sr, sid): self.assertEqual(sid, stream_id_out) rdt_out = RecordDictionaryTool.load_from_granule(msg) self.assertEquals(set([k for k,v in rdt_out.iteritems()]), set(available_fields_out)) for k,v in rdt_out.iteritems(): self.assertEquals(rdt_out[k], None) e.set() sub = StandaloneStreamSubscriber('stream_subscriber', cb) sub.xn.bind(stream_route_out.routing_key, getattr(self.container.proc_manager.procs[pid], stream_id_out).xp) self.addCleanup(sub.stop) sub.start() #publish msg to transform publisher.publish(msg) #wait to receive msg self.assertTrue(e.wait(4))
def verify_incoming(self, m,r,s): rdt = RecordDictionaryTool.load_from_granule(m) for k,v in rdt.iteritems(): np.testing.assert_array_equal(v, self.rdt[k]) self.assertEquals(m.data_producer_id, self.data_producer_id) self.assertEquals(m.provider_metadata_update, self.provider_metadata_update) self.assertNotEqual(m.creation_timestamp, None) self.event.set()
def verify_incoming(self, m, r, s): rdt = RecordDictionaryTool.load_from_granule(m) self.assertEquals(rdt, self.rdt) self.assertEquals(m.data_producer_id, self.data_producer_id) self.assertEquals(m.provider_metadata_update, self.provider_metadata_update) self.assertNotEqual(m.creation_timestamp, None) self.event.set()
def create_lookup_rdt(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_lookups() stream_def_id = self.pubsub_management.create_stream_definition('lookup', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"}) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) return rdt
def test_filter(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) filtered_stream_def_id = self.pubsub_management.create_stream_definition('filtered', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp']) self.addCleanup(self.pubsub_management.delete_stream_definition, filtered_stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id) self.assertEquals(rdt._available_fields,['time','temp']) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) with self.assertRaises(KeyError): rdt['pressure'] = np.arange(20) granule = rdt.to_granule() rdt2 = RecordDictionaryTool.load_from_granule(granule) self.assertEquals(rdt._available_fields, rdt2._available_fields) self.assertEquals(rdt.fields, rdt2.fields) for k,v in rdt.iteritems(): self.assertTrue(np.array_equal(rdt[k], rdt2[k]))
def verifier(msg, route, stream_id): for k, v in msg.record_dictionary.iteritems(): if v is not None: self.assertIsInstance(v, np.ndarray) rdt = RecordDictionaryTool.load_from_granule(msg) for field in rdt.fields: self.assertIsInstance(rdt[field], np.ndarray) verified.set()
def retrieve_oob(cls, dataset_id='', query=None, delivery_format=''): query = query or {} coverage = None try: coverage = cls._get_coverage(dataset_id) if coverage is None: raise BadRequest('no such coverage') if coverage.num_timesteps == 0: log.info('Reading from an empty coverage') rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary) else: rdt = ReplayProcess._coverage_to_granule(coverage=coverage, start_time=query.get('start_time', None), end_time=query.get('end_time',None), stride_time=query.get('stride_time',None), parameters=query.get('parameters',None), stream_def_id=delivery_format, tdoa=query.get('tdoa',None)) except: cls._eject_cache(dataset_id) log.exception('Problems reading from the coverage') raise BadRequest('Problems reading from the coverage') return rdt.to_granule()
def verifier(msg, route, stream_id): for k,v in msg.record_dictionary.iteritems(): if v is not None: self.assertIsInstance(v, np.ndarray) rdt = RecordDictionaryTool.load_from_granule(msg) for field in rdt.fields: self.assertIsInstance(rdt[field], np.ndarray) verified.set()
def check_rsn_instrument_data_product(self): passing = True # for RS03AXBS-MJ03A-06-PRESTA301 (PREST-A) there are a few listed data products # Parsed, Engineering # SFLPRES-0 SFLPRES-1 # Check for the two data products and make sure they have the proper parameters # SFLPRES-0 should data_products, _ = self.RR.find_resources_ext(alt_id_ns='PRE', alt_id='RS03AXBS-MJ03A-06-PRESTA301_SFLPRES_L0_DPID', id_only=True) passing &=self.assertTrue(len(data_products)==1) if not data_products: return passing data_product_id = data_products[0] stream_defs, _ = self.RR.find_objects(data_product_id,PRED.hasStreamDefinition,id_only=False) passing &= self.assertTrue(len(stream_defs)==1) if not stream_defs: return passing # Assert that the stream definition has the correct reference designator stream_def = stream_defs[0] passing &= self.assertEquals(stream_def.stream_configuration['reference_designator'], 'RS03AXBS-MJ03A-06-PRESTA301') # Get the pdict and make sure that the parameters corresponding to the available fields # begin with the appropriate data product identifier pdict_ids, _ = self.RR.find_objects(stream_def, PRED.hasParameterDictionary, id_only=True) passing &= self.assertEquals(len(pdict_ids), 1) if not pdict_ids: return passing pdict_id = pdict_ids[0] pdict = DatasetManagementService.get_parameter_dictionary(pdict_id) available_params = [pdict.get_context(i) for i in pdict.keys() if i in stream_def.available_fields] for p in available_params: if p.name=='time': # Ignore the domain parameter continue passing &= self.assertTrue(p.ooi_short_name.startswith('SFLPRES')) passing &= self.check_presta_instrument_data_products('RS01SLBS-MJ01A-06-PRESTA101') passing &= self.check_vel3d_instrument_data_products( 'RS01SLBS-MJ01A-12-VEL3DB101') passing &= self.check_presta_instrument_data_products('RS03AXBS-MJ03A-06-PRESTA301') passing &= self.check_vel3d_instrument_data_products( 'RS03AXBS-MJ03A-12-VEL3DB301') passing &= self.check_tempsf_instrument_data_product( 'RS03ASHS-MJ03B-07-TMPSFA301') passing &= self.check_vel3d_instrument_data_products( 'RS03INT2-MJ03D-12-VEL3DB304') passing &= self.check_trhph_instrument_data_products( 'RS03INT1-MJ03C-10-TRHPHA301') self.data_product_management.activate_data_product_persistence(data_product_id) dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) self.assert_array_almost_equal(rdt['seafloor_pressure'], [10.2504], 4) self.assert_array_almost_equal(rdt['absolute_pressure'], [14.8670], 4) self.data_product_management.suspend_data_product_persistence(data_product_id) # Should do nothing and not raise anything return passing
def rdt_to_granule(self, context, value_array, comp_val=None): pdict = ParameterDictionary() pdict.add_context(context) rdt = RecordDictionaryTool(param_dictionary=pdict) rdt['test'] = value_array granule = rdt.to_granule() rdt2 = RecordDictionaryTool.load_from_granule(granule) testval = comp_val if comp_val is not None else value_array actual = rdt2['test'] if isinstance(testval, basestring): self.assertEquals(testval, actual) else: np.testing.assert_array_equal(testval, actual)
def test_add_parameter_function(self): # req-tag: NEW SA - 31 # Make a CTDBP Data Product data_product_id = self.make_ctd_data_product() self.data_product_id = data_product_id dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) # Throw some data in it rdt = self.ph.rdt_for_data_product(data_product_id) rdt['time'] = np.arange(30) rdt['temp'] = np.arange(30) rdt['pressure'] = np.arange(30) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dataset_monitor.wait()) dataset_monitor.event.clear() #-------------------------------------------------------------------------------- # This is what the user defines either via preload or through the UI #-------------------------------------------------------------------------------- # Where the egg is egg_url = self.egg_url # Make a parameter function owner = 'ion_example.add_arrays' func = 'add_arrays' arglist = ['a', 'b'] pf = ParameterFunction(name='add_arrays', function_type=PFT.PYTHON, owner=owner, function=func, args=arglist, egg_uri=egg_url) pfunc_id = self.dataset_management.create_parameter_function(pf) #-------------------------------------------------------------------------------- self.addCleanup(self.dataset_management.delete_parameter_function, pfunc_id) # Make a data process definition dpd = DataProcessDefinition(name='add_arrays', description='Sums two arrays') dpd_id = self.data_process_management.create_data_process_definition(dpd, pfunc_id) # TODO: assert assoc exists argmap = {'a':'temp', 'b':'pressure'} dp_id = self.data_process_management.create_data_process(dpd_id, [data_product_id], argument_map=argmap, out_param_name='array_sum') # Verify that the function worked! granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['array_sum'], np.arange(0,60,2)) # Verify that we can inspect it as well source_code = self.data_process_management.inspect_data_process_definition(dpd_id) self.assertEquals(source_code, 'def add_arrays(a, b):\n return a+b\n') url = self.data_process_management.get_data_process_definition_url(dpd_id) self.assertEquals(url, 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg') dpd_ids, _ = self.resource_registry.find_resources(name='dataqc_spiketest', restype=RT.DataProcessDefinition, id_only=True) dpd_id = dpd_ids[0] url = self.data_process_management.get_data_process_definition_url(dpd_id) self.assertEquals(url, 'https://github.com/ooici/ion-functions/blob/master/ion_functions/qc/qc_functions.py')
def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])): return if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])): return if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])): return validation_event.set()
def rdt_to_granule(self, context, value_array, comp_val=None): pdict = ParameterDictionary() pdict.add_context(context) rdt = RecordDictionaryTool(param_dictionary=pdict) rdt["test"] = value_array granule = rdt.to_granule() rdt2 = RecordDictionaryTool.load_from_granule(granule) testval = comp_val if comp_val is not None else value_array actual = rdt2["test"] if isinstance(testval, basestring): self.assertEquals(testval, actual) else: np.testing.assert_array_equal(testval, actual)
def check_presta_instrument_data_products(self, reference_designator): # Check the parsed data product make sure it's got everything it needs and can be published persisted etc. # Absolute Pressure (SFLPRES_L0) is what comes off the instrumnet, SFLPRES_L1 is a pfunc # Let's go ahead and publish some fake data!!! # According to https://alfresco.oceanobservatories.org/alfresco/d/d/workspace/SpacesStore/63e16865-9d9e-4b11-b0b3-d5658faa5080/1341-00230_Data_Product_Spec_SFLPRES_OOI.pdf # Appendix A. Example 1. # p_psia_tide = 14.8670 # the tide should be 10.2504 passing = True info_list = [] passing &= self.check_data_product_reference(reference_designator, info_list) if not passing: return passing data_product_id, stream_def_id, dataset_id = info_list.pop() now = time.time() ntp_now = now + 2208988800. rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [ntp_now] rdt['absolute_pressure'] = [14.8670] passing &= self.assert_array_almost_equal(rdt['seafloor_pressure'], [10.2504], 4) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue( dataset_monitor.wait()) # Bumped to 20 to keep buildbot happy if not passing: return passing granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now]) passing &= self.assert_array_almost_equal(rdt['seafloor_pressure'], [10.2504], 4) passing &= self.assert_array_almost_equal(rdt['absolute_pressure'], [14.8670], 4) return passing
def create_rdt(self): contexts, pfuncs = self.create_pfuncs() context_ids = list(contexts.itervalues()) pdict_id = self.dataset_management.create_parameter_dictionary(name='functional_pdict', parameter_context_ids=context_ids, temporal_context='test_TIME') self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) stream_def_id = self.pubsub_management.create_stream_definition('functional', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) return rdt
def populate_vectors(self, stream_def_id, hours, temp_vector): now = time.time() ntp_now = now + 2208988800 for i in xrange(hours): rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) st = ntp_now - (3600 * (hours - i)) et = ntp_now - (3600 * (hours - (i + 1))) rdt['time'] = np.arange(st, et) rdt['temp'] = temp_vector(3600) yield rdt
def process(self, dataset_id, start_time=0, end_time=0): if not dataset_id: raise BadRequest('No dataset id specified.') now = time.time() start_time = start_time or (now - (3600 * (self.run_interval + 1)) ) # Every N hours with 1 of overlap end_time = end_time or now qc_params = [i for i in self.qc_params if i in self.qc_suffixes ] or self.qc_suffixes self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent) log.debug('Iterating over the data blocks') for st, et in self.chop(int(start_time), int(end_time)): log.debug('Chopping %s:%s', st, et) log.debug( "Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')", dataset_id, st, et) try: granule = self.data_retriever.retrieve(dataset_id, query={ 'start_time': st, 'end_time': et }) except BadRequest: data_products, _ = self.container.resource_registry.find_subjects( object=dataset_id, predicate=PRED.hasDataset, subject_type=RT.DataProduct) for data_product in data_products: log.exception('Failed to perform QC Post Processing on %s', data_product.name) log.error('Calculated Start Time: %s', st) log.error('Calculated End Time: %s', et) raise log.debug('Retrieved Data') rdt = RecordDictionaryTool.load_from_granule(granule) qc_fields = [ i for i in rdt.fields if any([i.endswith(j) for j in qc_params]) ] log.debug('QC Fields: %s', qc_fields) for field in qc_fields: val = rdt[field] if val is None: continue if not np.all(val): log.debug('Found QC Alerts') indexes = np.where(val == 0) timestamps = rdt[rdt.temporal_parameter][indexes[0]] self.flag_qc_parameter(dataset_id, field, timestamps.tolist(), {})
def recv_packet(self, msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) document = {} for k, v in rdt.iteritems(): value_array = np.atleast_1d(v[:]) if 'f' in value_array.dtype.str: document[k] = float(value_array[-1]) elif 'i' in value_array.dtype.str: document[k] = int(value_array[-1]) self.stored_value_manager.stored_value_cas(self.document_key, document)
def rdt_to_granule(self, context, value_array, comp_val=None): time = ParameterContext( name='time', param_type=QuantityType(value_encoding=np.float64)) pdict = ParameterDictionary() pdict.add_context(time, is_temporal=True) pdict.add_context(context) rdt = RecordDictionaryTool(param_dictionary=pdict) rdt['time'] = np.arange(len(value_array)) rdt['test'] = value_array granule = rdt.to_granule() rdt2 = RecordDictionaryTool.load_from_granule(granule) testval = comp_val if comp_val is not None else value_array actual = rdt2['test'] if isinstance(testval, basestring): self.assertEquals(testval, actual) else: np.testing.assert_array_equal(testval, actual)
def test_execute_transform(self): streams = self.setup_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_def_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])): return if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])): return if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool( stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2))
def _data_dict_to_rdt(cls, data_dict, stream_def_id, coverage): if stream_def_id: rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) else: rdt = RecordDictionaryTool(param_dictionary=coverage.parameter_dictionary) if not data_dict: log.warning('Retrieve returning empty set') return rdt if 'time' in data_dict and data_dict['time'].shape[0] == 0: log.warning('Retrieve returning empty set') return rdt rdt[coverage.temporal_parameter_name] = data_dict[coverage.temporal_parameter_name] for field in rdt.fields: if field == coverage.temporal_parameter_name: continue # The values have already been inside a coverage so we know they're safe and they exist, so they can be inserted directly. if field in data_dict: rdt._rd[field] = data_dict[field] #rdt[k] = v return rdt
def _replay(self): coverage = DatasetManagementService._get_coverage(self.dataset_id,mode='r') rdt = self._cov2granule(coverage=coverage, start_time=self.start_time, end_time=self.end_time, stride_time=self.stride_time, parameters=self.parameters, stream_def_id=self.stream_def_id) elements = len(rdt) for i in xrange(elements / self.publish_limit): outgoing = RecordDictionaryTool(stream_definition_id=self.stream_def_id) fields = self.parameters or outgoing.fields for field in fields: v = rdt[field] if v is not None: outgoing[field] = v[(i*self.publish_limit) : ((i+1)*self.publish_limit)] yield outgoing coverage.close(timeout=5) return
def check_tempsf_instrument_data_product(self, reference_designator): passing = True info_list = [] passing &= self.check_data_product_reference(reference_designator, info_list) if not passing: return passing data_product_id, stream_def_id, dataset_id = info_list.pop() now = time.time() ntp_now = now + 2208988800 rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [ntp_now] rdt['temperature'] = [[ 25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873, 15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993, 10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098, 5.3456, 4.2994, 4.3009 ]] dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) passing &= self.assertTrue(dataset_monitor.wait()) if not passing: return passing granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now]) passing &= self.assert_array_almost_equal(rdt['temperature'], [[ 25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873, 15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993, 10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098, 5.3456, 4.2994, 4.3009 ]]) return passing