def execute(input=None, context=None, config=None, params=None, state=None): rdt = RecordDictionaryTool.load_from_granule(input) out_rdt = RecordDictionaryTool(stream_definition_id=params) conductivity = rdt['conductivity'] pressure = rdt['pressure'] temperature = rdt['temp'] longitude = rdt['lon'] if rdt['lon'] is not None else 0 latitude = rdt['lat'] if rdt['lat'] is not None else 0 sp = SP_from_cndr(r=conductivity/cte.C3515, t=temperature, p=pressure) log.debug("Density algorithm calculated the sp (practical salinity) values: %s", sp) sa = SA_from_SP(sp, pressure, longitude, latitude) log.debug("Density algorithm calculated the sa (actual salinity) values: %s", sa) dens_value = rho(sa, temperature, pressure) for key, value in rdt.iteritems(): if key in out_rdt: if key=='conductivity' or key=='temp' or key=='pressure': continue out_rdt[key] = value[:] out_rdt['density'] = dens_value log.debug("Density algorithm returning density values: %s", out_rdt['density']) return out_rdt.to_granule()
def _get_new_ctd_packet(self, length): rdt = RecordDictionaryTool(stream_definition_id=self.stream_def._id) #Explicitly make these numpy arrays... c = numpy.array([random.uniform(0.0, 75.0) for i in xrange(length)]) t = numpy.array([random.uniform(-1.7, 21.0) for i in xrange(length)]) p = numpy.array([random.lognormvariate(1, 2) for i in xrange(length)]) lat = numpy.array( [random.uniform(-90.0, 90.0) for i in xrange(length)]) lon = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)]) h = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)]) start_time = ntplib.system_to_ntp_time(time.time()) - (length + 1) tvar = numpy.array([start_time + i for i in xrange(1, length + 1)]) rdt['time'] = tvar rdt['lat'] = lat rdt['lon'] = lon rdt['temp'] = t rdt['conductivity'] = c rdt['pressure'] = p # rdt['coordinates'] = rdt0 # rdt['data'] = rdt1 g = rdt.to_granule(data_producer_id=self.id) return g
def _get_data(cls, config): new_flst = get_safe(config, 'constraints.new_files', []) hdr_cnt = get_safe(config, 'header_count', SlocumParser.DEFAULT_HEADER_SIZE) for f in new_flst: try: parser = SlocumParser(f[0], hdr_cnt) #CBM: Not in use yet... # ext_dset_res = get_safe(config, 'external_dataset_res', None) # t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] # x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension'] # y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension'] # z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension'] # var_lst = ext_dset_res.dataset_description.parameters['variables'] max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') #tx_yml = get_safe(config, 'taxonomy') #ttool = TaxyTool.load(tx_yml) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary')) cnt = calculate_iteration_count(len(parser.sensor_map), max_rec) for x in xrange(cnt): #rdt = RecordDictionaryTool(taxonomy=ttool) rdt = RecordDictionaryTool(param_dictionary=pdict) for name in parser.sensor_map: d = parser.data_map[name][x*max_rec:(x+1)*max_rec] rdt[name]=d #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt) g = rdt.to_granule() yield g except SlocumParseException as spe: # TODO: Decide what to do here, raise an exception or carry on log.error('Error parsing data file: \'{0}\''.format(f))
def _get_new_ctd_packet(self, length): rdt = RecordDictionaryTool(stream_definition_id=self.stream_def._id) #Explicitly make these numpy arrays... c = numpy.array([random.uniform(0.0,75.0) for i in xrange(length)]) t = numpy.array([random.uniform(-1.7, 21.0) for i in xrange(length)]) p = numpy.array([random.lognormvariate(1,2) for i in xrange(length)]) lat = numpy.array([random.uniform(-90.0, 90.0) for i in xrange(length)]) lon = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)]) h = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)]) start_time = ntplib.system_to_ntp_time(time.time()) - (length + 1) tvar = numpy.array([start_time + i for i in xrange(1,length+1)]) rdt['time'] = tvar rdt['lat'] = lat rdt['lon'] = lon rdt['temp'] = t rdt['conductivity'] = c rdt['pressure'] = p # rdt['coordinates'] = rdt0 # rdt['data'] = rdt1 g = rdt.to_granule(data_producer_id=self.id) return g
def execute(input=None, context=None, config=None, params=None, state=None): ''' @param input Granule @retval result Granule ''' rdt = RecordDictionaryTool.load_from_granule(input) out_rdt = RecordDictionaryTool(stream_definition_id=params) conductivity = rdt['conductivity'] cond_value = (conductivity / 100000.0) - 0.5 for key, value in rdt.iteritems(): if key in out_rdt: out_rdt[key] = value[:] # Update the conductivity values out_rdt['conductivity'] = cond_value # build the granule for conductivity return out_rdt.to_granule()
def execute(input=None, context=None, config=None, params=None, state=None): rdt = RecordDictionaryTool.load_from_granule(input) out_rdt = RecordDictionaryTool(stream_definition_id=params) conductivity = rdt['conductivity'] pressure = rdt['pressure'] temperature = rdt['temp'] sal_value = SP_from_cndr(r=conductivity / cte.C3515, t=temperature, p=pressure) log.debug( "Salinity algorithm calculated the sp (practical salinity) values: %s", sal_value) for key, value in rdt.iteritems(): if key in out_rdt: if key == 'conductivity' or key == 'temp' or key == 'pressure': continue out_rdt[key] = value[:] out_rdt['salinity'] = sal_value return out_rdt.to_granule()
def test_granule_publish(self): log.debug("test_granule_publish ") self.loggerpids = [] #retrieve the param dict from the repository pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) stream_definition_id = self.pubsubclient.create_stream_definition('parsed stream', parameter_dictionary_id=pdict_id) tdom, sdom = time_series_domain() dp_obj = IonObject(RT.DataProduct, name=str(uuid.uuid4()), description='ctd stream test', temporal_domain = tdom.dump(), spatial_domain = sdom.dump()) data_product_id1 = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=stream_definition_id) # Retrieve the id of the output stream of the out Data Product stream_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasStream, None, True) log.debug( 'test_granule_publish: Data product streams1 = %s', stream_ids) pid = self.create_logger('ctd_parsed', stream_ids[0] ) self.loggerpids.append(pid) rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) #create the publisher from the stream route stream_route = self.pubsubclient.read_stream_route(stream_ids[0]) publisher = StandaloneStreamPublisher(stream_ids[0], stream_route) # this is one sample from the ctd driver tomato = {"driver_timestamp": 3555971105.1268806, "instrument_id": "ABC-123", "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed", "values": [{"value": 22.9304, "value_id": "temp"}, {"value": 51.57381, "value_id": "conductivity"}, {"value": 915.551, "value_id": "pressure"}]} for value in tomato['values']: log.debug("test_granule_publish: Looping tomato values key: %s val: %s ", str(value['value']), str(value['value_id'])) if value['value_id'] in rdt: rdt[value['value_id']] = numpy.array( [ value['value'] ] ) log.debug("test_granule_publish: Added data item %s val: %s ", str(value['value']), str(value['value_id']) ) g = rdt.to_granule() publisher.publish(g) gevent.sleep(3) for pid in self.loggerpids: self.processdispatchclient.cancel_process(pid) #-------------------------------------------------------------------------------- # Cleanup data products #-------------------------------------------------------------------------------- dp_ids, _ = self.rrclient.find_resources(restype=RT.DataProduct, id_only=True) for dp_id in dp_ids: self.dataproductclient.delete_data_product(dp_id)
def execute(input=None, context=None, config=None, params=None, state=None): rdt = RecordDictionaryTool.load_from_granule(input) out_rdt = RecordDictionaryTool(stream_definition_id=params['stream_def_id']) # Fill the time values out_rdt['time'] = rdt['time'] # The calibration coefficients temp_calibration_coeffs= params['calibration_coeffs']['temp_calibration_coeffs'] pres_calibration_coeffs= params['calibration_coeffs']['pres_calibration_coeffs'] cond_calibration_coeffs = params['calibration_coeffs']['cond_calibration_coeffs'] log.debug("params['calibration_coeffs']: %s", params['calibration_coeffs']) # Set the temperature values for the output granule out_rdt = CTDBP_L1_TransformAlgorithm.calculate_temperature( input_rdt = rdt, out_rdt = out_rdt, temp_calibration_coeffs= temp_calibration_coeffs ) # Set the pressure values for the output granule out_rdt = CTDBP_L1_TransformAlgorithm.calculate_pressure( input_rdt= rdt, out_rdt = out_rdt, pres_calibration_coeffs= pres_calibration_coeffs) # Set the conductivity values for the output granule # Note that since the conductivity caculation depends on whether TEMPWAT_L1, PRESWAT_L1 have been calculated, we need to do this last out_rdt = CTDBP_L1_TransformAlgorithm.calculate_conductivity( input_rdt = rdt, out_rdt = out_rdt, cond_calibration_coeffs = cond_calibration_coeffs ) # build the granule for the L1 stream return out_rdt.to_granule()
def recv_packet(self, packet, stream_route, stream_id): if packet == {}: return l0_values = RecordDictionaryTool.load_from_granule(packet) l1_values = RecordDictionaryTool( stream_definition_id=self.stream_definition_id) log.debug( "CTDBP L1 transform using L0 values: tempurature %s, pressure %s, conductivity %s", l0_values['temperature'], l0_values['pressure'], l0_values['conductivity']) #for key, value in 'lat', 'lon', 'time', ...: <-- do we want to be a little more specific here? for key, value in l0_values.iteritems(): if key in l1_values: l1_values[key] = value[:] l1_values['temp'] = self.calculate_temperature(l0=l0_values) l1_values['pressure'] = self.calculate_pressure(l0=l0_values) l1_values['conductivity'] = self.calculate_conductivity(l0=l0_values, l1=l1_values) log.debug( 'calculated L1 values: temp %s, pressure %s, conductivity %s', l1_values['temp'], l1_values['pressure'], l1_values['conductivity']) self.publisher.publish(msg=l1_values.to_granule())
def execute(input=None, context=None, config=None, params=None, state=None): rdt = RecordDictionaryTool.load_from_granule(input) out_rdt = RecordDictionaryTool(stream_definition_id=params) conductivity = rdt["conductivity"] pressure = rdt["pressure"] temperature = rdt["temp"] longitude = rdt["lon"] if rdt["lon"] is not None else 0 latitude = rdt["lat"] if rdt["lat"] is not None else 0 sp = SP_from_cndr(r=conductivity / cte.C3515, t=temperature, p=pressure) log.debug("Density algorithm calculated the sp (practical salinity) values: %s", sp) sa = SA_from_SP(sp, pressure, longitude, latitude) log.debug("Density algorithm calculated the sa (actual salinity) values: %s", sa) dens_value = rho(sa, temperature, pressure) for key, value in rdt.iteritems(): if key in out_rdt: if key == "conductivity" or key == "temp" or key == "pressure": continue out_rdt[key] = value[:] out_rdt["density"] = dens_value log.debug("Density algorithm returning density values: %s", out_rdt["density"]) return out_rdt.to_granule()
def _get_data(cls, config): """ Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len'] @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count'] """ ext_dset_res = get_safe(config, 'external_dataset_res', None) # Get the Dataset object from the config (should have been instantiated in _init_acquisition_cycle) ds = get_safe(config, 'dataset_object') if ext_dset_res and ds: t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension'] y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension'] z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension'] var_lst = ext_dset_res.dataset_description.parameters['variables'] t_slice = get_safe(config, 'constraints.temporal_slice', (slice(0, 1))) #TODO: Using 'eval' here is BAD - need to find a less sketchy way to pass constraints if isinstance(t_slice, str): t_slice = eval(t_slice) lon = ds.variables[x_vname][:] lat = ds.variables[y_vname][:] z = ds.variables[z_vname][:] t_arr = ds.variables[t_vname][t_slice] data_arrays = {} for varn in var_lst: data_arrays[varn] = ds.variables[varn][t_slice] max_rec = get_safe(config, 'max_records', 1) #dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') stream_def = get_safe(config, 'stream_def') cnt = calculate_iteration_count(t_arr.size, max_rec) for x in xrange(cnt): ta = t_arr[x * max_rec:(x + 1) * max_rec] # Make a 'master' RecDict rdt = RecordDictionaryTool(stream_definition_id=stream_def) # Assign coordinate values to the RecDict rdt[x_vname] = lon rdt[y_vname] = lat rdt[z_vname] = z # Assign data values to the RecDict rdt[t_vname] = ta for key, arr in data_arrays.iteritems(): d = arr[x * max_rec:(x + 1) * max_rec] rdt[key] = d g = rdt.to_granule() yield g ds.close()
def _get_data(cls, config): """ Iterable function that acquires data from a source iteratively based on constraints provided by config Passed into BaseDataHandler._publish_data and iterated to publish samples. @param config dict containing configuration parameters, may include constraints, formatters, etc @retval an iterable that returns well-formed Granule objects on each iteration """ new_flst = get_safe(config, 'constraints.new_files', []) parser_mod = get_safe(config, 'parser_mod', '') parser_cls = get_safe(config, 'parser_cls', '') module = __import__(parser_mod, fromlist=[parser_cls]) classobj = getattr(module, parser_cls) for f in new_flst: try: size = os.stat(f[0]).st_size try: #find the new data check index in config index = -1 for ndc in config['set_new_data_check']: if ndc[0] == f[0]: index = config['set_new_data_check'].index(ndc) break except: log.error('File name not found in attachment') parser = classobj(f[0], f[3]) max_rec = get_safe(config, 'max_records', 1) stream_def = get_safe(config, 'stream_def') while True: particles = parser.get_records(max_count=max_rec) if not particles: break rdt = RecordDictionaryTool(stream_definition_id=stream_def) populate_rdt(rdt, particles) g = rdt.to_granule() # TODO: record files already read for future additions... # #update new data check with the latest file position if 'set_new_data_check' in config and index > -1: # WRONG: should only record this after file finished parsing, # but may not have another yield at that point to trigger update config['set_new_data_check'][index] = (f[0], f[1], f[2], size) yield g # parser.close() except Exception as ex: # TODO: Decide what to do here, raise an exception or carry on log.error('Error parsing data file \'{0}\': {1}'.format(f, ex))
def test_granule_publish(self): log.debug("test_granule_publish ") self.loggerpids = [] #retrieve the param dict from the repository pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) stream_definition_id = self.pubsubclient.create_stream_definition('parsed stream', parameter_dictionary_id=pdict_id) dp_obj = IonObject(RT.DataProduct, name=str(uuid.uuid4()), description='ctd stream test') data_product_id1 = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=stream_definition_id) # Retrieve the id of the output stream of the out Data Product stream_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasStream, None, True) log.debug( 'test_granule_publish: Data product streams1 = %s', stream_ids) pid = self.create_logger('ctd_parsed', stream_ids[0] ) self.loggerpids.append(pid) rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) #create the publisher from the stream route stream_route = self.pubsubclient.read_stream_route(stream_ids[0]) publisher = StandaloneStreamPublisher(stream_ids[0], stream_route) # this is one sample from the ctd driver tomato = {"driver_timestamp": 3555971105.1268806, "instrument_id": "ABC-123", "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed", "values": [{"value": 22.9304, "value_id": "temp"}, {"value": 51.57381, "value_id": "conductivity"}, {"value": 915.551, "value_id": "pressure"}]} for value in tomato['values']: log.debug("test_granule_publish: Looping tomato values key: %s val: %s ", str(value['value']), str(value['value_id'])) if value['value_id'] in rdt: rdt[value['value_id']] = numpy.array( [ value['value'] ] ) log.debug("test_granule_publish: Added data item %s val: %s ", str(value['value']), str(value['value_id']) ) g = rdt.to_granule() publisher.publish(g) gevent.sleep(3) for pid in self.loggerpids: self.processdispatchclient.cancel_process(pid) #-------------------------------------------------------------------------------- # Cleanup data products #-------------------------------------------------------------------------------- dp_ids, _ = self.rrclient.find_resources(restype=RT.DataProduct, id_only=True) for dp_id in dp_ids: self.dataproductclient.delete_data_product(dp_id)
def test_stream_ingestion_worker(self): self.start_ingestion_worker() context_ids, time_ctxt = self._create_param_contexts() pdict_id = self.dataset_management_client.create_parameter_dictionary( name='stream_ingestion_pdict', parameter_context_ids=context_ids, temporal_context='ingestion_timestamp') self.addCleanup( self.dataset_management_client.delete_parameter_dictionary, pdict_id) dataset_id = self.dataset_management_client.create_dataset( name='fake_dataset', description='fake_dataset', stream_id=self.stream_id, spatial_domain=self.spatial_dom.dump(), temporal_domain=self.time_dom.dump(), parameter_dictionary_id=pdict_id) self.addCleanup(self.dataset_management_client.delete_dataset, dataset_id) self.cov = self._create_coverage(dataset_id=dataset_id, parameter_dict_id=pdict_id, time_dom=self.time_dom, spatial_dom=self.spatial_dom) self.addCleanup(self.cov.close) rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['conductivity'] = 1 rdt['pressure'] = 2 rdt['salinity'] = 3 self.start_listener(dataset_id) self.publisher.publish(rdt.to_granule()) self.data_modified = Event() self.data_modified.wait(30) cov = self.get_coverage(dataset_id) self.assertIsNotNone(cov.get_parameter_values('raw')) deserializer = IonObjectDeserializer(obj_registry=get_obj_registry()) granule = retrieve_stream(dataset_id) rdt_complex = RecordDictionaryTool.load_from_granule(granule) rdt_complex['raw'] = [ deserializer.deserialize(i) for i in rdt_complex['raw'] ] for gran in rdt_complex['raw']: rdt_new = RecordDictionaryTool.load_from_granule(gran) self.assertIn(1, rdt_new['conductivity']) self.assertIn(2, rdt_new['pressure']) self.assertIn(3, rdt_new['salinity']) cov.close()
def _get_data(cls, config): """ Iterable function that acquires data from a source iteratively based on constraints provided by config Passed into BaseDataHandler._publish_data and iterated to publish samples. @param config dict containing configuration parameters, may include constraints, formatters, etc @retval an iterable that returns well-formed Granule objects on each iteration """ new_flst = get_safe(config, "constraints.new_files", []) parser_mod = get_safe(config, "parser_mod", "") parser_cls = get_safe(config, "parser_cls", "") module = __import__(parser_mod, fromlist=[parser_cls]) classobj = getattr(module, parser_cls) for f in new_flst: try: size = os.stat(f[0]).st_size try: # find the new data check index in config index = -1 for ndc in config["set_new_data_check"]: if ndc[0] == f[0]: index = config["set_new_data_check"].index(ndc) break except: log.error("File name not found in attachment") parser = classobj(f[0], f[3]) max_rec = get_safe(config, "max_records", 1) stream_def = get_safe(config, "stream_def") while True: particles = parser.get_records(max_count=max_rec) if not particles: break rdt = RecordDictionaryTool(stream_definition_id=stream_def) populate_rdt(rdt, particles) g = rdt.to_granule() # TODO: record files already read for future additions... # #update new data check with the latest file position if "set_new_data_check" in config and index > -1: # WRONG: should only record this after file finished parsing, # but may not have another yield at that point to trigger update config["set_new_data_check"][index] = (f[0], f[1], f[2], size) yield g # parser.close() except Exception as ex: # TODO: Decide what to do here, raise an exception or carry on log.error("Error parsing data file '{0}': {1}".format(f, ex))
def _get_new_ctd_packet(self, stream_definition_id, length): rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) for field in rdt: rdt[field] = numpy.array([random.uniform(0.0, 75.0) for i in xrange(length)]) g = rdt.to_granule() return g
def recv_packet(self, msg, stream_route, stream_id): ''' receive packet for ingestion ''' log.debug('received granule for stream %s', stream_id) if msg == {}: log.error('Received empty message from stream: %s', stream_id) return # Message validation if not isinstance(msg, Granule): log.error('Ingestion received a message that is not a granule: %s', msg) return rdt = RecordDictionaryTool.load_from_granule(msg) if rdt is None: log.error('Invalid granule (no RDT) for stream %s', stream_id) return if not len(rdt): log.debug('Empty granule for stream %s', stream_id) return dp_id_list = self.retrieve_dataprocess_for_stream(stream_id) for dp_id in dp_id_list: function, argument_list = self.retrieve_function_and_define_args(dp_id) args = [] rdt = RecordDictionaryTool.load_from_granule(msg) #create the input arguments list #todo: this logic is tied to the example funcation, generalize for func_param, record_param in argument_list.iteritems(): args.append(rdt[record_param]) try: #run the calc #todo: nothing in the data process resource to specify multi-out map result = function(*args) out_stream_definition, output_parameter = self.retrieve_dp_output_params(dp_id) rdt = RecordDictionaryTool(stream_definition_id=out_stream_definition) publisher = self._publisher_map.get(dp_id,'') rdt[ output_parameter ] = result if publisher: publisher.publish(rdt.to_granule()) else: log.error('Publisher not found for data process %s', dp_id) self.update_dp_metrics( dp_id ) except ImportError: log.error('Error running transform')
def test_event_transform_worker(self): self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # test that a data process (type: data-product-in / event-out) can be defined and launched. # verify that event fields are correctly populated self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] # create the DPD and two DPs self.event_data_process_id = self.create_event_data_processes() # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects(subject=self.event_data_process_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_event_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route ) self.start_event_transform_listener() self.data_modified = Event() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) self.assertTrue(self.event_verified.wait(self.wait_time))
def _get_new_ctd_packet(self, stream_definition_id, length): rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) for field in rdt: rdt[field] = numpy.array( [random.uniform(0.0, 75.0) for i in xrange(length)]) g = rdt.to_granule() return g
def test_transform_worker(self): self.loggerpids = [] self.data_process_objs = [] self._output_stream_ids = [] self.start_transform_worker() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) #retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] #create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name='parsed_subscription') self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route ) self.start_event_listener() self.dp_list = self.create_data_processes() self.data_modified = Event() self.data_modified.wait(5) rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) self.data_modified.wait(5) # Cleanup processes for pid in self.loggerpids: self.processdispatchclient.cancel_process(pid)
def _get_new_ctd_packet(self, stream_definition_id, length): rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) rdt['time'] = numpy.arange(self.i, self.i+length) for field in rdt: if isinstance(rdt._pdict.get_context(field).param_type, QuantityType): rdt[field] = numpy.array([random.uniform(0.0,75.0) for i in xrange(length)]) g = rdt.to_granule() self.i+=length return g
def _get_new_ctd_L0_packet(self, stream_definition_id, length): rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) rdt['time'] = numpy.arange(self.i, self.i+length) for field in rdt: if isinstance(rdt._pdict.get_context(field).param_type, QuantityType): rdt[field] = numpy.array([random.uniform(0.0,75.0) for i in xrange(length)]) g = rdt.to_granule() self.i+=length return g
def _publish_granules(self, stream_id=None, stream_route=None, values = None,number=None, length=None): pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) for i in xrange(number): rdt['input_voltage'] = values rdt['preferred_timestamp'] = numpy.array([random.uniform(0,1000) for l in xrange(length)]) g = rdt.to_granule() pub.publish(g)
def _trigger_func(self, stream_id): self.last_time = 0 parameter_dictionary = self._create_parameter() #@todo - add lots of comments in here while not self.finished.is_set(): length = 10 #Explicitly make these numpy arrays... c = numpy.array( [random.uniform(0.0, 75.0) for i in xrange(length)]) t = numpy.array( [random.uniform(-1.7, 21.0) for i in xrange(length)]) p = numpy.array( [random.lognormvariate(1, 2) for i in xrange(length)]) lat = numpy.array( [random.uniform(-90.0, 90.0) for i in xrange(length)]) lon = numpy.array( [random.uniform(0.0, 360.0) for i in xrange(length)]) tvar = numpy.array( [self.last_time + i for i in xrange(1, length + 1)]) self.last_time = max(tvar) rdt = RecordDictionaryTool(param_dictionary=parameter_dictionary) rdt['temp'] = t # ExampleDataProducer_algorithm.execute(t) rdt['conductivity'] = c # ExampleDataProducer_algorithm.execute(c) rdt['pressure'] = p # ExampleDataProducer_algorithm.execute(p) rdt['time'] = tvar rdt['lat'] = lat rdt['lon'] = lon log.info("logging published Record Dictionary:\n %s", rdt.pretty_print()) g = rdt.to_granule() log.info('Sending %d values!' % length) if (isinstance(g, Granule)): self.publish(g, stream_id) time.sleep(2.0)
def _trigger_func(self, stream_id): log.debug("SimpleCtdDataProducer:_trigger_func ") parameter_dictionary = get_param_dict('ctd_parsed_param_dict') rdt = RecordDictionaryTool(param_dictionary=parameter_dictionary) # The base SimpleCtdPublisher provides a gevent Event that indicates when the process is being # shut down. We can use a simple pattern here to accomplish both a safe shutdown of this loop # when the process shuts down *AND* do the timeout between loops in a very safe/efficient fashion. # # By using this instead of a sleep in the loop itself, we can immediatly interrupt this loop when # the process is being shut down instead of having to wait for the sleep to terminate. while not self.finished.wait(timeout=2): length = 10 #Explicitly make these numpy arrays... c = numpy.array( [random.uniform(0.0, 75.0) for i in xrange(length)]) t = numpy.array( [random.uniform(-1.7, 21.0) for i in xrange(length)]) p = numpy.array( [random.lognormvariate(1, 2) for i in xrange(length)]) lat = numpy.array( [random.uniform(-90.0, 90.0) for i in xrange(length)]) lon = numpy.array( [random.uniform(0.0, 360.0) for i in xrange(length)]) h = numpy.array( [random.uniform(0.0, 360.0) for i in xrange(length)]) tvar = numpy.array( [self.last_time + i for i in xrange(1, length + 1)]) self.last_time = max(tvar) rdt['time'] = tvar rdt['lat'] = lat rdt['lon'] = lon rdt['temp'] = t rdt['conductivity'] = c rdt['pressure'] = p g = rdt.to_granule() log.debug('SimpleCtdDataProducer: Sending %d values!' % length) self.publisher.publish(g)
def execute(input=None, context=None, config=None, params=None, state=None): rdt = RecordDictionaryTool.load_from_granule(input) out_rdt = RecordDictionaryTool(stream_definition_id=params) pressure = rdt['pressure'] pres_value = (pressure / 100.0) + 0.5 for key, value in rdt.iteritems(): if key in out_rdt: out_rdt[key] = value[:] out_rdt['pressure'] = pres_value return out_rdt.to_granule()
def execute(input=None, context=None, config=None, params=None, state=None): rdt = RecordDictionaryTool.load_from_granule(input) out_rdt = RecordDictionaryTool(stream_definition_id=params) temperature = rdt['temp'] temp_value = (temperature / 10000.0) - 10 for key, value in rdt.iteritems(): if key in out_rdt: out_rdt[key] = value[:] out_rdt['temp'] = temp_value return out_rdt.to_granule()
def execute(input=None, context=None, config=None, params=None, state=None): rdt = RecordDictionaryTool.load_from_granule(input) out_rdt = RecordDictionaryTool(stream_definition_id=params) pressure = rdt['pressure'] pres_value = (pressure / 100.0) + 0.5 for key, value in rdt.iteritems(): if key in out_rdt: out_rdt[key] = value[:] out_rdt['pressure'] = pres_value return out_rdt.to_granule()
def publish_loop(self): sine_ampl = 2.0 # Amplitude in both directions samples = 60 startTime = time.time() count = samples #something other than zero self.dataset_management = DatasetManagementServiceClient( node=self.container.node) while not self.finished.is_set(): count = time.time() - startTime sine_curr_deg = (count % samples) * 360 / samples c = numpy.array( [sine_ampl * math.sin(math.radians(sine_curr_deg))]) t = numpy.array( [sine_ampl * 2 * math.sin(math.radians(sine_curr_deg + 45))]) p = numpy.array( [sine_ampl * 4 * math.sin(math.radians(sine_curr_deg + 60))]) lat = numpy.array([32.8]) lon = numpy.array([-119.6]) # convert time to ntp time. Standard notation in the system tvar = numpy.array([ntplib.system_to_ntp_time(time.time())]) parameter_dictionary = self._create_parameter() #parameter_dictionary = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict') rdt = RecordDictionaryTool(param_dictionary=parameter_dictionary) h = numpy.array([random.uniform(0.0, 360.0)]) rdt['time'] = tvar rdt['lat'] = lat rdt['lon'] = lon rdt['temp'] = t rdt['conductivity'] = c rdt['pressure'] = p g = rdt.to_granule(data_producer_id=self.id) log.info('SinusoidalCtdPublisher sending 1 record!') self.publisher.publish(g, self.stream_id) time.sleep(1.0)
def _build_granule(stream_definition_id=None, field_names=None, values=None): """ Builds a granule with values corresponding only to the params specified in the field names @param param_dictionary ParameterDictionary @param field_name str @param value numpy.array @retval Granule """ root_rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) zipped = zip(field_names, values) for k,v in zipped: root_rdt[k] = v return root_rdt.to_granule()
def _build_granule(stream_definition_id=None, field_names=None, values=None): """ Builds a granule with values corresponding only to the params specified in the field names @param param_dictionary ParameterDictionary @param field_name str @param value numpy.array @retval Granule """ root_rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) zipped = zip(field_names, values) for k,v in zipped: root_rdt[k] = v return root_rdt.to_granule()
def execute(input=None, context=None, config=None, params=None, state=None): """ Dependencies ------------ CONDWAT_L1, TEMPWAT_L1, PRESWAT_L1 Algorithms used --------------- PRACSAL = gsw_SP_from_C((CONDWAT_L1 * 10),TEMPWAT_L1,PRESWAT_L1) Reference --------- The calculations below are based on the following spreadsheet document: https://docs.google.com/spreadsheet/ccc?key=0Au7PUzWoCKU4dDRMeVI0RU9yY180Z0Y5U0hyMUZERmc#gid=0 """ rdt = RecordDictionaryTool.load_from_granule(input) out_rdt = RecordDictionaryTool(stream_definition_id=params) out_rdt['time'] = rdt['time'] conductivity = rdt['conductivity'] pressure = rdt['pressure'] temperature = rdt['temp'] sal_value = gsw.sp_from_c(conductivity * 10, temperature, pressure) log.debug( "CTDBP Salinity algorithm calculated the sp (practical salinity) values: %s", sal_value) for key, value in rdt.iteritems(): if key in out_rdt: if key == 'conductivity' or key == 'temp' or key == 'pressure': continue out_rdt[key] = value[:] out_rdt['salinity'] = sal_value return out_rdt.to_granule()
def _build_granule(stream_definition_id=None, field_name='', value=None, time=None): ''' @param param_dictionary ParameterDictionary @param field_name str @param value numpy.array @retval Granule ''' root_rdt = RecordDictionaryTool( stream_definition_id=stream_definition_id) root_rdt[field_name] = value root_rdt['time'] = time return root_rdt.to_granule()
def execute(input=None, context=None, config=None, params=None, state=None): rdt = RecordDictionaryTool.load_from_granule(input) out_rdt = RecordDictionaryTool(stream_definition_id=params) conductivity = rdt['conductivity'] cond_value = (conductivity / 100000.0) - 0.5 for key, value in rdt.iteritems(): if key in out_rdt: out_rdt[key] = value[:] # Update the conductivity values out_rdt['conductivity'] = cond_value # build the granule for conductivity return out_rdt.to_granule()
def execute(input=None, context=None, config=None, params=None, state=None): rdt = RecordDictionaryTool.load_from_granule(input) out_rdt = RecordDictionaryTool(stream_definition_id=params) conductivity = rdt['conductivity'] pressure = rdt['pressure'] temperature = rdt['temp'] sal_value = SP_from_cndr(r=conductivity/cte.C3515, t=temperature, p=pressure) for key, value in rdt.iteritems(): if key in out_rdt: out_rdt[key] = value[:] out_rdt['salinity'] = sal_value return out_rdt.to_granule()
def poller_callback(self, file_like_object, state_memento): log.debug('poller found data to parse') try: config = self.config['parser'] parser = self._create_plugin(config, kwargs=dict(open_file=file_like_object, parse_after=self.latest_granule_time)) records = parser.get_records(max_count=self.max_records) log.trace('have %d records', len(records)) while records: # secretly uses pubsub client rdt = RecordDictionaryTool(param_dictionary=self.parameter_dictionary) for key in records[0]: #assume all dict records have same keys rdt[key] = [ record[key] for record in records ] g = rdt.to_granule() self.publisher.publish(g) records = parser.get_records(max_count=self.max_records) self._set_state('poller_state', state_memento) except: log.error('error handling data', exc_info=True)
def _trigger_func(self, stream_id): self.last_time = 0 parameter_dictionary = self._create_parameter() #@todo - add lots of comments in here while not self.finished.is_set(): length = 10 #Explicitly make these numpy arrays... c = numpy.array([random.uniform(0.0,75.0) for i in xrange(length)]) t = numpy.array([random.uniform(-1.7, 21.0) for i in xrange(length)]) p = numpy.array([random.lognormvariate(1,2) for i in xrange(length)]) lat = numpy.array([random.uniform(-90.0, 90.0) for i in xrange(length)]) lon = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)]) tvar = numpy.array([self.last_time + i for i in xrange(1,length+1)]) self.last_time = max(tvar) rdt = RecordDictionaryTool(param_dictionary=parameter_dictionary) rdt['temp'] = t # ExampleDataProducer_algorithm.execute(t) rdt['conductivity'] = c # ExampleDataProducer_algorithm.execute(c) rdt['pressure'] = p # ExampleDataProducer_algorithm.execute(p) rdt['time'] = tvar rdt['lat'] = lat rdt['lon'] = lon log.info("logging published Record Dictionary:\n %s", rdt.pretty_print()) g = rdt.to_granule() log.info('Sending %d values!' % length) if(isinstance(g, Granule)): self.publish(g, stream_id) time.sleep(2.0)
def _get_data(cls, config): """ Iterable function that acquires data from a source iteratively based on constraints provided by config Passed into BaseDataHandler._publish_data and iterated to publish samples. @param config dict containing configuration parameters, may include constraints, formatters, etc @retval an iterable that returns well-formed Granule objects on each iteration """ new_flst = get_safe(config, 'constraints.new_files', []) hdr_cnt = get_safe(config, 'header_count', SlocumParser.DEFAULT_HEADER_SIZE) for f in new_flst: try: parser = SlocumParser(f[0], hdr_cnt) #CBM: Not in use yet... # ext_dset_res = get_safe(config, 'external_dataset_res', None) # t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] # x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension'] # y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension'] # z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension'] # var_lst = ext_dset_res.dataset_description.parameters['variables'] max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') stream_def = get_safe(config, 'stream_def') cnt = calculate_iteration_count( len(parser.data_map[parser.data_map.keys()[0]]), max_rec) for x in xrange(cnt): #rdt = RecordDictionaryTool(taxonomy=ttool) rdt = RecordDictionaryTool(stream_definition_id=stream_def) for name in parser.sensor_map: d = parser.data_map[name][x * max_rec:(x + 1) * max_rec] rdt[name] = d #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt) g = rdt.to_granule() yield g except SlocumParseException: # TODO: Decide what to do here, raise an exception or carry on log.error('Error parsing data file: \'{0}\''.format(f))
def _trigger_func(self, stream_id): log.debug("SimpleCtdDataProducer:_trigger_func ") parameter_dictionary = get_param_dict("ctd_parsed_param_dict") rdt = RecordDictionaryTool(param_dictionary=parameter_dictionary) # The base SimpleCtdPublisher provides a gevent Event that indicates when the process is being # shut down. We can use a simple pattern here to accomplish both a safe shutdown of this loop # when the process shuts down *AND* do the timeout between loops in a very safe/efficient fashion. # # By using this instead of a sleep in the loop itself, we can immediatly interrupt this loop when # the process is being shut down instead of having to wait for the sleep to terminate. while not self.finished.wait(timeout=2): length = 10 # Explicitly make these numpy arrays... c = numpy.array([random.uniform(0.0, 75.0) for i in xrange(length)]) t = numpy.array([random.uniform(-1.7, 21.0) for i in xrange(length)]) p = numpy.array([random.lognormvariate(1, 2) for i in xrange(length)]) lat = numpy.array([random.uniform(-90.0, 90.0) for i in xrange(length)]) lon = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)]) h = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)]) tvar = numpy.array([self.last_time + i for i in xrange(1, length + 1)]) self.last_time = max(tvar) rdt["time"] = tvar rdt["lat"] = lat rdt["lon"] = lon rdt["temp"] = t rdt["conductivity"] = c rdt["pressure"] = p g = rdt.to_granule() log.debug("SimpleCtdDataProducer: Sending %d values!" % length) self.publisher.publish(g)
def execute(input=None, context=None, config=None, params=None, state=None): rdt = RecordDictionaryTool.load_from_granule(input) out_rdt = RecordDictionaryTool(stream_definition_id=params) absolute_pressure = rdt['absolute_pressure'] for key, value in rdt.iteritems(): cond = key=='time' or key=='port_timestamp' or key=='driver_timestamp'\ or key=='internal_timestamp' or key=='preferred_timestamp' or key=='timestamp'\ or key=='lat' or key=='lon' if cond and key in out_rdt: out_rdt[key] = value[:] out_rdt['seafloor_pressure'] = absolute_pressure * 0.689475728 return out_rdt.to_granule()
def execute(input=None, context=None, config=None, params=None, state=None): stream_def_id = params rdt = RecordDictionaryTool.load_from_granule(input) salinity = get_safe(rdt, "salinity") longitude = get_safe(rdt, "lon") latitude = get_safe(rdt, "lat") time = get_safe(rdt, "time") root_rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) root_rdt["salinity"] = 2 * salinity root_rdt["time"] = time root_rdt["lat"] = latitude root_rdt["lon"] = longitude g = root_rdt.to_granule() return g
def execute(input=None, context=None, config=None, params=None, state=None): rdt = RecordDictionaryTool.load_from_granule(input) out_rdt = RecordDictionaryTool(stream_definition_id=params) temperature = rdt['temp'] temp_value = (temperature / 10000.0) - 10 for key, value in rdt.iteritems(): if key in out_rdt: out_rdt[key] = value[:] out_rdt['temp'] = temp_value return out_rdt.to_granule()
def execute(input=None, context=None, config=None, params=None, state=None): stream_def_id = params rdt = RecordDictionaryTool.load_from_granule(input) salinity = get_safe(rdt, 'salinity') longitude = get_safe(rdt, 'lon') latitude = get_safe(rdt, 'lat') time = get_safe(rdt, 'time') root_rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) root_rdt['salinity'] = 2 * salinity root_rdt['time'] = time root_rdt['lat'] = latitude root_rdt['lon'] = longitude g = root_rdt.to_granule() return g
def publish_loop(self): sine_ampl = 2.0 # Amplitude in both directions samples = 60 startTime = time.time() count = samples #something other than zero self.dataset_management = DatasetManagementServiceClient(node=self.container.node) while not self.finished.is_set(): count = time.time() - startTime sine_curr_deg = (count % samples) * 360 / samples c = numpy.array( [sine_ampl * math.sin(math.radians(sine_curr_deg))] ) t = numpy.array( [sine_ampl * 2 * math.sin(math.radians(sine_curr_deg + 45))] ) p = numpy.array( [sine_ampl * 4 * math.sin(math.radians(sine_curr_deg + 60))] ) lat = numpy.array([32.8]) lon = numpy.array([-119.6]) # convert time to ntp time. Standard notation in the system tvar = numpy.array([ntplib.system_to_ntp_time(time.time())]) parameter_dictionary = self._create_parameter() #parameter_dictionary = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict') rdt = RecordDictionaryTool(param_dictionary=parameter_dictionary) h = numpy.array([random.uniform(0.0, 360.0)]) rdt['time'] = tvar rdt['lat'] = lat rdt['lon'] = lon rdt['temp'] = t rdt['conductivity'] = c rdt['pressure'] = p g = rdt.to_granule(data_producer_id=self.id) log.info('SinusoidalCtdPublisher sending 1 record!') self.publisher.publish(g, self.stream_id) time.sleep(1.0)
def _publish_granules(self, stream_id=None, stream_route=None, values = None,number=None): pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) times = numpy.array([number for l in xrange(self.length)]) for i in xrange(number): rdt['input_voltage'] = values rdt['preferred_timestamp'] = ['time' for l in xrange(len(times))] rdt['time'] = times g = rdt.to_granule() g.data_producer_id = 'instrument_1' log.debug("granule #%s published by instrument:: %s" % ( number,g)) pub.publish(g)
def _publish_granules(self, stream_id=None, stream_route=None, values = None,number=None): pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) times = numpy.array([number for l in xrange(self.length)]) for i in xrange(number): rdt['input_voltage'] = values rdt['preferred_timestamp'] = ['time' for l in xrange(len(times))] rdt['time'] = times g = rdt.to_granule() g.data_producer_id = 'instrument_1' log.debug("granule #%s published by instrument:: %s" % ( number,g)) pub.publish(g)
def recv_packet(self, packet, stream_route, stream_id): if packet == {}: return l0_values = RecordDictionaryTool.load_from_granule(packet) l1_values = RecordDictionaryTool(stream_definition_id=self.stream_definition_id) log.debug("CTDBP L1 transform using L0 values: tempurature %s, pressure %s, conductivity %s", l0_values['temperature'], l0_values['pressure'], l0_values['conductivity']) #for key, value in 'lat', 'lon', 'time', ...: <-- do we want to be a little more specific here? for key, value in l0_values.iteritems(): if key in l1_values: l1_values[key] = value[:] l1_values['temp'] = self.calculate_temperature(l0=l0_values) l1_values['pressure'] = self.calculate_pressure(l0=l0_values) l1_values['conductivity'] = self.calculate_conductivity(l0=l0_values, l1=l1_values) log.debug('calculated L1 values: temp %s, pressure %s, conductivity %s', l1_values['temp'], l1_values['pressure'], l1_values['conductivity']) self.publisher.publish(msg=l1_values.to_granule())
def _get_data(cls, config): """ Iterable function that acquires data from a source iteratively based on constraints provided by config Passed into BaseDataHandler._publish_data and iterated to publish samples. @param config dict containing configuration parameters, may include constraints, formatters, etc @retval an iterable that returns well-formed Granule objects on each iteration """ new_flst = get_safe(config, 'constraints.new_files', []) hdr_cnt = get_safe(config, 'header_count', SlocumParser.DEFAULT_HEADER_SIZE) for f in new_flst: try: parser = SlocumParser(f[0], hdr_cnt) #CBM: Not in use yet... # ext_dset_res = get_safe(config, 'external_dataset_res', None) # t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] # x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension'] # y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension'] # z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension'] # var_lst = ext_dset_res.dataset_description.parameters['variables'] max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') stream_def = get_safe(config, 'stream_def') cnt = calculate_iteration_count(len(parser.data_map[parser.data_map.keys()[0]]), max_rec) for x in xrange(cnt): #rdt = RecordDictionaryTool(taxonomy=ttool) rdt = RecordDictionaryTool(stream_definition_id=stream_def) for name in parser.sensor_map: d = parser.data_map[name][x * max_rec:(x + 1) * max_rec] rdt[name] = d #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt) g = rdt.to_granule() yield g except SlocumParseException: # TODO: Decide what to do here, raise an exception or carry on log.error('Error parsing data file: \'{0}\''.format(f))
def execute(input=None, context=None, config=None, params=None, state=None): """ Dependencies ------------ CONDWAT_L1, TEMPWAT_L1, PRESWAT_L1 Algorithms used --------------- PRACSAL = gsw_SP_from_C((CONDWAT_L1 * 10),TEMPWAT_L1,PRESWAT_L1) Reference --------- The calculations below are based on the following spreadsheet document: https://docs.google.com/spreadsheet/ccc?key=0Au7PUzWoCKU4dDRMeVI0RU9yY180Z0Y5U0hyMUZERmc#gid=0 """ rdt = RecordDictionaryTool.load_from_granule(input) out_rdt = RecordDictionaryTool(stream_definition_id=params) out_rdt['time'] = rdt['time'] conductivity = rdt['conductivity'] pressure = rdt['pressure'] temperature = rdt['temp'] sal_value = gsw.sp_from_c(conductivity * 10, temperature, pressure) log.debug("CTDBP Salinity algorithm calculated the sp (practical salinity) values: %s", sal_value) for key, value in rdt.iteritems(): if key in out_rdt: if key=='conductivity' or key=='temp' or key=='pressure': continue out_rdt[key] = value[:] out_rdt['salinity'] = sal_value return out_rdt.to_granule()
def _get_data(cls, config): """ Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len'] @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count'] """ array_len = get_safe(config, 'constraints.array_len', 1) max_rec = get_safe(config, 'max_records', 1) #dprod_id = get_safe(config, 'data_producer_id') stream_def = get_safe(config, 'stream_def') arr = npr.random_sample(array_len) #log.debug('Array to send using max_rec={0}: {1}'.format(max_rec, arr)) cnt = calculate_iteration_count(arr.size, max_rec) for x in xrange(cnt): rdt = RecordDictionaryTool(stream_definition_id=stream_def) d = arr[x * max_rec:(x + 1) * max_rec] rdt['dummy'] = d g = rdt.to_granule() yield g
def execute(input=None, context=None, config=None, params=None, state=None): rdt = RecordDictionaryTool.load_from_granule(input) out_rdt = RecordDictionaryTool(stream_definition_id=params) conductivity = rdt['conductivity'] pressure = rdt['pressure'] temperature = rdt['temp'] sal_value = SP_from_cndr(r=conductivity/cte.C3515, t=temperature, p=pressure) log.debug("Salinity algorithm calculated the sp (practical salinity) values: %s", sal_value) for key, value in rdt.iteritems(): if key in out_rdt: if key=='conductivity' or key=='temp' or key=='pressure': continue out_rdt[key] = value[:] out_rdt['salinity'] = sal_value return out_rdt.to_granule()
def execute(input=None, context=None, config=None, params=None, state=None): stream_def_id = params rdt = RecordDictionaryTool.load_from_granule(input) salinity = get_safe(rdt, 'salinity') longitude = get_safe(rdt, 'lon') latitude = get_safe(rdt, 'lat') time = get_safe(rdt, 'time') root_rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) root_rdt['salinity'] = 2 * salinity root_rdt['time'] = time root_rdt['lat'] = latitude root_rdt['lon'] = longitude g = root_rdt.to_granule() return g
def execute(input=None, context=None, config=None, params=None, state=None): rdt = RecordDictionaryTool.load_from_granule(input) out_rdt = RecordDictionaryTool(stream_definition_id=params) absolute_pressure = rdt['absolute_pressure'] for key, value in rdt.iteritems(): cond = key=='time' or key=='port_timestamp' or key=='driver_timestamp'\ or key=='internal_timestamp' or key=='preferred_timestamp' or key=='timestamp'\ or key=='lat' or key=='lon' if cond and key in out_rdt: out_rdt[key] = value[:] out_rdt['absolute_pressure'] = absolute_pressure return out_rdt.to_granule()
def _get_data(cls, config): """ A generator that retrieves config['constraints']['count'] number of sequential Fibonacci numbers @param config Dict of configuration parameters - must contain ['constraints']['count'] """ cnt = get_safe(config, 'constraints.count', 1) max_rec = get_safe(config, 'max_records', 1) #dprod_id = get_safe(config, 'data_producer_id') stream_def = get_safe(config, 'stream_def') def fibGenerator(): """ A Fibonacci sequence generator """ count = 0 ret = [] a, b = 1, 1 while 1: count += 1 ret.append(a) if count == max_rec: yield np.array(ret) ret = [] count = 0 a, b = b, a + b gen = fibGenerator() cnt = calculate_iteration_count(cnt, max_rec) for i in xrange(cnt): rdt = RecordDictionaryTool(stream_definition_id=stream_def) d = gen.next() rdt['data'] = d g = rdt.to_granule() yield g
def test_two_transforms_inline(self): self.dp_list = [] self.event1_verified = Event() self.event2_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product_one', description='input test stream one') self.input_dp_one_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) dpd_id = self.create_data_process_definition() dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products( ) first_dp_id = self.create_data_process_one(dpd_id, dp1_func_output_dp_id) second_dp_id = self.create_data_process_two(dpd_id, dp1_func_output_dp_id, dp2_func_output_dp_id) #retrieve subscription from data process one subscription_objs, _ = self.rrclient.find_objects( subject=first_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #retrieve the Stream for these data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_one_id, PRED.hasStream, RT.Stream, True) self.stream_one_id = stream_ids[0] #the input to data process two is the output from data process one stream_ids, assoc_ids = self.rrclient.find_objects( dp1_func_output_dp_id, PRED.hasStream, RT.Stream, True) self.stream_two_id = stream_ids[0] # Run provenance on the output dataproduct of the second data process to see all the links # are as expected output_data_product_provenance = self.dataproductclient.get_data_product_provenance( dp2_func_output_dp_id) # Do a basic check to see if there were 2 entries in the provenance graph. Parent and Child. self.assertTrue(len(output_data_product_provenance) == 3) # confirm that the linking from the output dataproduct to input dataproduct is correct self.assertTrue( dp1_func_output_dp_id in output_data_product_provenance[dp2_func_output_dp_id]['parents']) self.assertTrue( self.input_dp_one_id in output_data_product_provenance[dp1_func_output_dp_id]['parents']) #create subscription to stream ONE, create data process and publish granule on stream ONE #create a queue to catch the published granules of stream ONE subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_one_id, self.stream_two_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) stream_route_one = self.pubsub_client.read_stream_route( self.stream_one_id) self.publisher_one = StandaloneStreamPublisher( stream_id=self.stream_one_id, stream_route=stream_route_one) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=second_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #data process 1 adds conductivity + pressure and puts the result in salinity #data process 2 adds salinity + pressure and puts the result in conductivity self.start_event_listener() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) self.assertTrue(self.event2_verified.wait(self.wait_time)) self.assertTrue(self.event1_verified.wait(self.wait_time))
def test_multi_subscriptions(self): self.dp_list = [] self.event1_verified = Event() self.event2_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product_one', description='input test stream one') self.input_dp_one_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) input_dp_obj = IonObject(RT.DataProduct, name='input_data_product_two', description='input test stream two') self.input_dp_two_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) #retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_one_id, PRED.hasStream, RT.Stream, True) self.stream_one_id = stream_ids[0] stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_two_id, PRED.hasStream, RT.Stream, True) self.stream_two_id = stream_ids[0] dpd_id = self.create_data_process_definition() dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products( ) first_dp_id = self.create_data_process_one(dpd_id, dp1_func_output_dp_id) second_dp_id = self.create_data_process_two(dpd_id, self.input_dp_two_id, dp2_func_output_dp_id) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=first_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #create subscription to stream ONE, create data process and publish granule on stream ONE #create a queue to catch the published granules of stream ONE self.subscription_one_id = self.pubsub_client.create_subscription( name='parsed_subscription_one', stream_ids=[self.stream_one_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_one_id) self.pubsub_client.activate_subscription(self.subscription_one_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_one_id) stream_route_one = self.pubsub_client.read_stream_route( self.stream_one_id) self.publisher_one = StandaloneStreamPublisher( stream_id=self.stream_one_id, stream_route=stream_route_one) self.start_event_listener() #data process 1 adds conductivity + pressure and puts the result in salinity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=second_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #create subscription to stream ONE and TWO, move TW subscription, create data process and publish granule on stream TWO #create a queue to catch the published granules of stream TWO self.subscription_two_id = self.pubsub_client.create_subscription( name='parsed_subscription_one_two', stream_ids=[self.stream_two_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_two_id) self.pubsub_client.activate_subscription(self.subscription_two_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_two_id) stream_route_two = self.pubsub_client.read_stream_route( self.stream_two_id) self.publisher_two = StandaloneStreamPublisher( stream_id=self.stream_two_id, stream_route=stream_route_two) #data process 1 adds conductivity + pressure and puts the result in salinity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) #data process 2 adds salinity + pressure and puts the result in conductivity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [22] rdt['pressure'] = [4] rdt['salinity'] = [1] self.publisher_two.publish(msg=rdt.to_granule(), stream_id=self.stream_two_id) self.assertTrue(self.event2_verified.wait(self.wait_time)) self.assertTrue(self.event1_verified.wait(self.wait_time))
def test_event_transform_worker(self): self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # test that a data process (type: data-product-in / event-out) can be defined and launched. # verify that event fields are correctly populated self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] # create the DPD and two DPs self.event_data_process_id = self.create_event_data_processes() # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=self.event_data_process_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_event_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route) self.start_event_transform_listener() self.data_modified = Event() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) self.assertTrue(self.event_verified.wait(self.wait_time))
def test_transform_worker(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.dp_list = [] self.data_process_objs = [] self._output_stream_ids = [] self.granule_verified = Event() self.worker_assigned_event_verified = Event() self.dp_created_event_verified = Event() self.heartbeat_event_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] self.start_event_listener() # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process( ) self.dp_list.append(dataprocess_id) # validate the repository for data product algorithms persists the new resources NEW SA-1 # create_data_process call created one of each dpd_ids, _ = self.rrclient.find_resources( restype=OT.DataProcessDefinition, id_only=False) # there will be more than one becuase of the DPDs that reperesent the PFs in the data product above self.assertTrue(dpd_ids is not None) dp_ids, _ = self.rrclient.find_resources(restype=OT.DataProcess, id_only=False) # only one DP becuase the PFs that are in the code dataproduct above are not activated yet. self.assertEquals(len(dp_ids), 1) # validate the name and version label NEW SA - 2 dataprocessdef_obj = self.dataprocessclient.read_data_process_definition( dataprocessdef_id) self.assertEqual(dataprocessdef_obj.version_label, '1.0a') self.assertEqual(dataprocessdef_obj.name, 'add_arrays') # validate that the DPD has an attachment NEW SA - 21 attachment_ids, assoc_ids = self.rrclient.find_objects( dataprocessdef_id, PRED.hasAttachment, RT.Attachment, True) self.assertEqual(len(attachment_ids), 1) attachment_obj = self.rrclient.read_attachment(attachment_ids[0]) log.debug('attachment: %s', attachment_obj) # validate that the data process resource has input and output data products associated # L4-CI-SA-RQ-364 and NEW SA-3 outproduct_ids, assoc_ids = self.rrclient.find_objects( dataprocess_id, PRED.hasOutputProduct, RT.DataProduct, True) self.assertEqual(len(outproduct_ids), 1) inproduct_ids, assoc_ids = self.rrclient.find_objects( dataprocess_id, PRED.hasInputProduct, RT.DataProduct, True) self.assertEqual(len(inproduct_ids), 1) # Test for provenance. Get Data product produced by the data processes output_data_product_id, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance( output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 2) self.assertTrue(self.input_dp_id in output_data_product_provenance[ output_data_product_id[0]]['parents']) self.assertTrue(output_data_product_provenance[ output_data_product_id[0]]['parents'][self.input_dp_id] ['data_process_definition_id'] == dataprocessdef_id) # NEW SA - 4 | Data processing shall include the appropriate data product algorithm name and version number in # the metadata of each output data product created by the data product algorithm. output_data_product_obj, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=False) self.assertTrue(output_data_product_obj[0].name != None) self.assertTrue(output_data_product_obj[0]._rev != None) # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=dataprocess_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route) for n in range(1, 101): rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) # validate that the output granule is received and the updated value is correct self.assertTrue(self.granule_verified.wait(self.wait_time)) # validate that the data process loaded into worker event is received (L4-CI-SA-RQ-182) self.assertTrue( self.worker_assigned_event_verified.wait(self.wait_time)) # validate that the data process create (with data product ids) event is received (NEW SA -42) self.assertTrue(self.dp_created_event_verified.wait(self.wait_time)) # validate that the data process heartbeat event is received (for every hundred granules processed) (L4-CI-SA-RQ-182) #this takes a while so set wait limit to large value self.assertTrue(self.heartbeat_event_verified.wait(200)) # validate that the code from the transform function can be retrieve via inspect_data_process_definition src = self.dataprocessclient.inspect_data_process_definition( dataprocessdef_id) self.assertIn('def add_arrays(a, b)', src) # now delete the DPD and DP then verify that the resources are retired so that information required for provenance are still available self.dataprocessclient.delete_data_process(dataprocess_id) self.dataprocessclient.delete_data_process_definition( dataprocessdef_id) in_dp_objs, _ = self.rrclient.find_objects( subject=dataprocess_id, predicate=PRED.hasInputProduct, object_type=RT.DataProduct, id_only=True) self.assertTrue(in_dp_objs is not None) dpd_objs, _ = self.rrclient.find_subjects( subject_type=RT.DataProcessDefinition, predicate=PRED.hasDataProcess, object=dataprocess_id, id_only=True) self.assertTrue(dpd_objs is not None)
def execute(input=None, context=None, config=None, params=None, state=None): """ Dependencies ------------ PRACSAL, PRESWAT_L1, longitude, latitude, TEMPWAT_L1 Algorithms used ------------ 1. PRACSAL = gsw_SP_from_C((CONDWAT_L1 * 10),TEMPWAT_L1,PRESWAT_L1) 2. absolute_salinity = gsw_SA_from_SP(PRACSAL,PRESWAT_L1,longitude,latitude) 3. conservative_temperature = gsw_CT_from_t(absolute_salinity,TEMPWAT_L1,PRESWAT_L1) 4. DENSITY = gsw_rho(absolute_salinity,conservative_temperature,PRESWAT_L1) Reference ------------ The calculations below are based on the following spreadsheet document: https://docs.google.com/spreadsheet/ccc?key=0Au7PUzWoCKU4dDRMeVI0RU9yY180Z0Y5U0hyMUZERmc#gid=0 """ lat = params['lat'] lon = params['lon'] stream_def_id = params['stream_def'] rdt = RecordDictionaryTool.load_from_granule(input) out_rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) out_rdt['time'] = rdt['time'] conductivity = rdt['conductivity'] pressure = rdt['pressure'] temperature = rdt['temp'] latitude = np.ones(conductivity.shape) * lat longitude = np.ones(conductivity.shape) * lon log.debug("Using latitude: %s,\n longitude: %s", latitude, longitude) # Doing: PRACSAL = gsw_SP_from_C((CONDWAT_L1 * 10),TEMPWAT_L1,PRESWAT_L1) pracsal = gsw.sp_from_c(conductivity * 10, temperature, pressure) log.debug("CTDBP Density algorithm calculated the pracsal (practical salinity) values: %s", pracsal) # Doing: absolute_salinity = gsw_SA_from_SP(PRACSAL,PRESWAT_L1,longitude,latitude) absolute_salinity = gsw.sa_from_sp(pracsal, pressure, longitude, latitude) log.debug("CTDBP Density algorithm calculated the absolute_salinity (actual salinity) values: %s", absolute_salinity) conservative_temperature = gsw.ct_from_t(absolute_salinity, temperature, pressure) log.debug("CTDBP Density algorithm calculated the conservative temperature values: %s", conservative_temperature) # Doing: DENSITY = gsw_rho(absolute_salinity,conservative_temperature,PRESWAT_L1) dens_value = gsw.rho(absolute_salinity, conservative_temperature, pressure) log.debug("Calculated density values: %s", dens_value) for key, value in rdt.iteritems(): if key in out_rdt: if key=='conductivity' or key=='temp' or key=='pressure': continue out_rdt[key] = value[:] out_rdt['density'] = dens_value return out_rdt.to_granule()