def _get_data(cls, config): new_flst = get_safe(config, 'constraints.new_files', []) hdr_cnt = get_safe(config, 'header_count', SlocumParser.DEFAULT_HEADER_SIZE) for f in new_flst: try: parser = SlocumParser(f[0], hdr_cnt) #CBM: Not in use yet... # ext_dset_res = get_safe(config, 'external_dataset_res', None) # t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] # x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension'] # y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension'] # z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension'] # var_lst = ext_dset_res.dataset_description.parameters['variables'] max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') #tx_yml = get_safe(config, 'taxonomy') #ttool = TaxyTool.load(tx_yml) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary')) cnt = calculate_iteration_count(len(parser.sensor_map), max_rec) for x in xrange(cnt): #rdt = RecordDictionaryTool(taxonomy=ttool) rdt = RecordDictionaryTool(param_dictionary=pdict) for name in parser.sensor_map: d = parser.data_map[name][x*max_rec:(x+1)*max_rec] rdt[name]=d #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt) g = build_granule(data_producer_id=dprod_id, record_dictionary=rdt, param_dictionary=pdict) yield g except SlocumParseException as spe: # TODO: Decide what to do here, raise an exception or carry on log.error('Error parsing data file: \'{0}\''.format(f))
def _get_new_ctd_packet(self, stream_id, length): rdt = RecordDictionaryTool(taxonomy=tx) #Explicitly make these numpy arrays... c = numpy.array([random.uniform(0.0,75.0) for i in xrange(length)]) t = numpy.array([random.uniform(-1.7, 21.0) for i in xrange(length)]) p = numpy.array([random.lognormvariate(1,2) for i in xrange(length)]) lat = numpy.array([random.uniform(-90.0, 90.0) for i in xrange(length)]) lon = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)]) h = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)]) tvar = numpy.array([self.last_time + i for i in xrange(1,length+1)]) self.last_time = max(tvar) log.warn('Got time: %s' % str(tvar)) log.warn('Got t: %s' % str(t)) rdt['time'] = tvar rdt['lat'] = lat rdt['lon'] = lon rdt['height'] = h rdt['temp'] = t rdt['cond'] = c rdt['pres'] = p # rdt['coordinates'] = rdt0 # rdt['data'] = rdt1 g = build_granule(data_producer_id=stream_id, taxonomy=tx, record_dictionary=rdt) return g
def execute(self, granule): """Processes incoming data!!!! """ rdt = RecordDictionaryTool.load_from_granule(granule) #todo: use only flat dicts for now, may change later... # rdt0 = rdt['coordinates'] # rdt1 = rdt['data'] pressure = get_safe(rdt, 'pres') #psd.get_values('conductivity') longitude = get_safe(rdt, 'lon') # psd.get_values('longitude') latitude = get_safe(rdt, 'lat') #psd.get_values('latitude') time = get_safe(rdt, 'time') # psd.get_values('time') height = get_safe(rdt, 'height') # psd.get_values('time') log.warn('Got pressure: %s' % str(pressure)) # L1 # 1) The algorithm input is the L0 pressure data product (p_hex) and, in the case of the SBE 37IM, the pressure range (P_rng) from metadata. # 2) Convert the hexadecimal string to a decimal string # 3) For the SBE 37IM only, convert the pressure range (P_rng) from psia to dbar SBE 37IM # Convert P_rng (input from metadata) from psia to dbar # 4) Perform scaling operation # SBE 37IM # L1 pressure data product (in dbar): # Use the constructor to put data into a granule psc = PointSupplementConstructor(point_definition=self.outgoing_stream_def, stream_id=self.streams['output']) ### Assumes the config argument for output streams is known and there is only one 'output'. ### the stream id is part of the metadata which much go in each stream granule - this is awkward to do at the ### application level like this! scaled_pressure = pressure for i in xrange(len(pressure)): #todo: get pressure range from metadata (if present) and include in calc scaled_pressure[i] = ( pressure[i]) root_rdt = RecordDictionaryTool(taxonomy=self.tx) #todo: use only flat dicts for now, may change later... # data_rdt = RecordDictionaryTool(taxonomy=self.tx) # coord_rdt = RecordDictionaryTool(taxonomy=self.tx) root_rdt['pres'] = scaled_pressure root_rdt['time'] = time root_rdt['lat'] = latitude root_rdt['lon'] = longitude root_rdt['height'] = height # root_rdt['coordinates'] = coord_rdt # root_rdt['data'] = data_rdt return build_granule(data_producer_id='ctd_L1_pressure', taxonomy=self.tx, record_dictionary=root_rdt) return psc.close_stream_granule()
def test_combine_granule(self): tt = TaxyTool() tt.add_taxonomy_set('a') rdt = RecordDictionaryTool(tt) rdt['a'] = np.array([1,2,3]) granule1 = build_granule('test',tt,rdt) rdt = RecordDictionaryTool(tt) rdt['a'] = np.array([4,5,6]) granule2 = build_granule('test',tt,rdt) granule3 = combine_granules(granule1,granule2) rdt = RecordDictionaryTool.load_from_granule(granule3) self.assertTrue(np.allclose(rdt['a'],np.array([1,2,3,4,5,6])))
def test_build_granule_and_load_from_granule_with_taxonomy(self): #Define a taxonomy and add sets. add_taxonomy_set takes one or more names and assigns them to one handle tx = TaxyTool() tx.add_taxonomy_set('temp', 'long_temp_name') tx.add_taxonomy_set('cond', 'long_cond_name') tx.add_taxonomy_set('pres', 'long_pres_name') tx.add_taxonomy_set('rdt') # map is {<local name>: <granule name or path>} #Use RecordDictionaryTool to create a record dictionary. Send in the taxonomy so the Tool knows what to expect rdt = RecordDictionaryTool(taxonomy=tx) #Create some arrays and fill them with random values temp_array = np.random.standard_normal(100) cond_array = np.random.standard_normal(100) pres_array = np.random.standard_normal(100) #Use the RecordDictionaryTool to add the values. This also would work if you used long_temp_name, etc. rdt['temp'] = temp_array rdt['cond'] = cond_array rdt['pres'] = pres_array #You can also add in another RecordDictionaryTool, providing the taxonomies are the same. rdt2 = RecordDictionaryTool(taxonomy=tx) rdt2['temp'] = temp_array rdt['rdt'] = rdt2 g = build_granule(data_producer_id='john', taxonomy=tx, record_dictionary=rdt) l_tx = TaxyTool.load_from_granule(g) l_rd = RecordDictionaryTool.load_from_granule(g) # Make sure we got back the same Taxonomy Object self.assertEquals(l_tx._t, tx._t) self.assertEquals(l_tx.get_handles('temp'), tx.get_handles('temp')) self.assertEquals(l_tx.get_handles('testing_2'), tx.get_handles('testing_2')) # Now test the record dictionary object self.assertEquals(l_rd._rd, rdt._rd) self.assertEquals(l_rd._tx._t, rdt._tx._t) for k, v in l_rd.iteritems(): self.assertIn(k, rdt) if isinstance(v, np.ndarray): self.assertTrue( (v == rdt[k]).all()) else: self.assertEquals(v._rd, rdt[k]._rd)
def execute(self, granule): """Processes incoming data!!!! """ rdt = RecordDictionaryTool.load_from_granule(granule) #todo: use only flat dicts for now, may change later... # rdt0 = rdt['coordinates'] # rdt1 = rdt['data'] temperature = get_safe(rdt, 'temp') conductivity = get_safe(rdt, 'cond') density = get_safe(rdt, 'dens') longitude = get_safe(rdt, 'lon') latitude = get_safe(rdt, 'lat') time = get_safe(rdt, 'time') height = get_safe(rdt, 'height') log.warn('Got conductivity: %s' % str(conductivity)) log.warn('Got density: %s' % str(density)) log.warn('Got temperature: %s' % str(temperature)) sp = SP_from_cndr(r=conductivity/cte.C3515, t=temperature, p=density) sa = SA_from_SP(sp, density, longitude, latitude) density = rho(sa, temperature, density) log.warn('Got density: %s' % str(density)) # Use the constructor to put data into a granule #psc = PointSupplementConstructor(point_definition=self.outgoing_stream_def, stream_id=self.streams['output']) ### Assumes the config argument for output streams is known and there is only one 'output'. ### the stream id is part of the metadata which much go in each stream granule - this is awkward to do at the ### application level like this! root_rdt = RecordDictionaryTool(param_dictionary=self.dens) #todo: use only flat dicts for now, may change later... # data_rdt = RecordDictionaryTool(taxonomy=self.tx) # coord_rdt = RecordDictionaryTool(taxonomy=self.tx) root_rdt['density'] = density root_rdt['time'] = time root_rdt['lat'] = latitude root_rdt['lon'] = longitude root_rdt['height'] = height # root_rdt['coordinates'] = coord_rdt # root_rdt['data'] = data_rdt return build_granule(data_producer_id='ctd_L2_density', param_dictionary=self.dens, record_dictionary=root_rdt)
def _trigger_func(self, stream_id): log.debug("SimpleCtdDataProducer:_trigger_func ") rdt = RecordDictionaryTool(taxonomy=tx) # rdt0 = RecordDictionaryTool(taxonomy=tx) # rdt1 = RecordDictionaryTool(taxonomy=tx) #@todo - add lots of comments in here while True: length = 10 #Explicitly make these numpy arrays... c = numpy.array([random.uniform(0.0,75.0) for i in xrange(length)]) t = numpy.array([random.uniform(-1.7, 21.0) for i in xrange(length)]) p = numpy.array([random.lognormvariate(1,2) for i in xrange(length)]) lat = numpy.array([random.uniform(-90.0, 90.0) for i in xrange(length)]) lon = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)]) h = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)]) tvar = numpy.array([self.last_time + i for i in xrange(1,length+1)]) self.last_time = max(tvar) rdt['time'] = tvar rdt['lat'] = lat rdt['lon'] = lon rdt['height'] = h rdt['temp'] = t rdt['cond'] = c rdt['pres'] = p #todo: use only flat dicts for now, may change later... # rdt['coordinates'] = rdt0 # rdt['data'] = rdt1 log.debug("SimpleCtdDataProducer: logging published Record Dictionary:\n %s", rdt.pretty_print()) g = build_granule(data_producer_id=stream_id, taxonomy=tx, record_dictionary=rdt) log.debug('SimpleCtdDataProducer: Sending %d values!' % length) self.publisher.publish(g) time.sleep(2.0)
def execute(self, granule): """ Example process to double the salinity value """ # Use the PointSupplementStreamParser to pull data from a granule #psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=packet) rdt = RecordDictionaryTool.load_from_granule(granule) salinity = get_safe(rdt, 'salinity') longitude = get_safe(rdt, 'lon') latitude = get_safe(rdt, 'lat') time = get_safe(rdt, 'time') height = get_safe(rdt, 'height') # # pull data from a granule # psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=granule) # # longitude = psd.get_values('longitude') # latitude = psd.get_values('latitude') # height = psd.get_values('height') # time = psd.get_values('time') # salinity = psd.get_values('salinity') salinity *= 2.0 print ('Doubled salinity: %s' % str(salinity)) # Use the constructor to put data into a granule # psc = PointSupplementConstructor(point_definition=self.outgoing_stream_def, stream_id=self.streams['output']) # # for i in xrange(len(salinity)): # point_id = psc.add_point(time=time[i],location=(longitude[i],latitude[i],height[i])) # psc.add_scalar_point_coverage(point_id=point_id, coverage_id='salinity', value=salinity[i]) # # return psc.close_stream_granule() root_rdt = RecordDictionaryTool(taxonomy=self.tx) #data_rdt = RecordDictionaryTool(taxonomy=self.tx) #coord_rdt = RecordDictionaryTool(taxonomy=self.tx) root_rdt['salinity'] = salinity root_rdt['time'] = time root_rdt['lat'] = latitude root_rdt['lon'] = longitude root_rdt['height'] = height #root_rdt['coordinates'] = coord_rdt #root_rdt['data'] = data_rdt return build_granule(data_producer_id='ctd_L2_salinity', taxonomy=self.tx, record_dictionary=root_rdt)
def execute(self, granule): """Processes incoming data!!!! """ rdt = RecordDictionaryTool.load_from_granule(granule) #todo: use only flat dicts for now, may change later... # rdt0 = rdt['coordinates'] # rdt1 = rdt['data'] temperature = get_safe(rdt, 'temp') longitude = get_safe(rdt, 'lon') latitude = get_safe(rdt, 'lat') time = get_safe(rdt, 'time') height = get_safe(rdt, 'height') log.warn('Got temperature: %s' % str(temperature)) # The L1 temperature data product algorithm takes the L0 temperature data product and converts it into Celcius. # Once the hexadecimal string is converted to decimal, only scaling (dividing by a factor and adding an offset) is # required to produce the correct decimal representation of the data in Celsius. # The scaling function differs by CTD make/model as described below. # SBE 37IM, Output Format 0 # 1) Standard conversion from 5-character hex string (Thex) to decimal (tdec) # 2) Scaling: T [C] = (tdec / 10,000) - 10 root_rdt = RecordDictionaryTool(param_dictionary=self.temp) #todo: use only flat dicts for now, may change later... # data_rdt = RecordDictionaryTool(taxonomy=self.tx) # coord_rdt = RecordDictionaryTool(taxonomy=self.tx) scaled_temperature = temperature for i in xrange(len(temperature)): scaled_temperature[i] = ( temperature[i] / 10000.0) - 10 root_rdt['temp'] = scaled_temperature root_rdt['time'] = time root_rdt['lat'] = latitude root_rdt['lon'] = longitude root_rdt['height'] = height #todo: use only flat dicts for now, may change later... # root_rdt['coordinates'] = coord_rdt # root_rdt['data'] = data_rdt return build_granule(data_producer_id='ctd_L1_temperature', param_dictionary=self.temp, record_dictionary=root_rdt)
def execute(self, granule): """Processes incoming data!!!! """ rdt = RecordDictionaryTool.load_from_granule(granule) #todo: use only flat dicts for now, may change later... # rdt0 = rdt['coordinates'] # rdt1 = rdt['data'] temperature = get_safe(rdt, 'sal') conductivity = get_safe(rdt, 'cond') salsure = get_safe(rdt, 'temp') longitude = get_safe(rdt, 'lon') latitude = get_safe(rdt, 'lat') time = get_safe(rdt, 'time') height = get_safe(rdt, 'height') log.warn('Got conductivity: %s' % str(conductivity)) log.warn('Got salsure: %s' % str(salsure)) log.warn('Got temperature: %s' % str(temperature)) salinity = SP_from_cndr(r=conductivity/cte.C3515, t=temperature, p=salsure) log.warn('Got salinity: %s' % str(salinity)) root_rdt = RecordDictionaryTool(param_dictionary=self.sal) #todo: use only flat dicts for now, may change later... # data_rdt = RecordDictionaryTool(taxonomy=self.tx) # coord_rdt = RecordDictionaryTool(taxonomy=self.tx) root_rdt['salinity'] = salinity root_rdt['time'] = time root_rdt['lat'] = latitude root_rdt['lon'] = longitude root_rdt['height'] = height # root_rdt['coordinates'] = coord_rdt # root_rdt['data'] = data_rdt return build_granule(data_producer_id='ctd_L2_salinity', param_dictionary=self.sal, record_dictionary=root_rdt)
def _trigger_func(self, stream_id): sine_ampl = 2.0 # Amplitude in both directions samples = 60 sine_curr_deg = 0 # varies from 0 - 360 startTime = time.time() count = samples #something other than zero while True: count = time.time() - startTime sine_curr_deg = (count % samples) * 360 / samples c = numpy.array( [sine_ampl * math.sin(math.radians(sine_curr_deg))] ) t = numpy.array( [sine_ampl * 2 * math.sin(math.radians(sine_curr_deg + 45))] ) p = numpy.array( [sine_ampl * 4 * math.sin(math.radians(sine_curr_deg + 60))] ) lat = lon = numpy.array([0.0]) tvar = numpy.array([time.time()]) # ctd_packet = ctd_stream_packet(stream_id=stream_id, # c=c, t=t, p = p, lat = lat, lon = lon, time=tvar) rdt = RecordDictionaryTool(taxonomy=tx) h = numpy.array([random.uniform(0.0, 360.0)]) rdt['time'] = tvar rdt['lat'] = lat rdt['lon'] = lon rdt['height'] = h rdt['temp'] = t rdt['cond'] = c rdt['pres'] = p g = build_granule(data_producer_id=stream_id, taxonomy=tx, record_dictionary=rdt) log.info('SinusoidalCtdPublisher sending 1 record!') self.publisher.publish(g) time.sleep(1.0)
def _build_granule_settings(self, param_dictionary=None, field_name='', value=None, time=None, latitude=None, longitude=None, height=None): root_rdt = RecordDictionaryTool(param_dictionary=param_dictionary) #data_rdt = RecordDictionaryTool(taxonomy=taxonomy) root_rdt[field_name] = value #coor_rdt = RecordDictionaryTool(taxonomy=taxonomy) root_rdt['time'] = time root_rdt['lat'] = latitude root_rdt['lon'] = longitude root_rdt['height'] = height #todo: use only flat dicts for now, may change later... # root_rdt['coordinates'] = coor_rdt # root_rdt['data'] = data_rdt log.debug("ctd_L0_all:_build_granule_settings: logging published Record Dictionary:\n %s", str(root_rdt.pretty_print())) return build_granule(data_producer_id='ctd_L0', param_dictionary=param_dictionary, record_dictionary=root_rdt)
def _get_data(cls, config): """ Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len'] @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count'] """ array_len = get_safe(config, 'constraints.array_len',1) max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id') #tx_yml = get_safe(config, 'taxonomy') #ttool = TaxyTool.load(tx_yml) pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary')) arr = npr.random_sample(array_len) log.debug('Array to send using max_rec={0}: {1}'.format(max_rec, arr)) cnt = calculate_iteration_count(arr.size, max_rec) for x in xrange(cnt): rdt = RecordDictionaryTool(param_dictionary=pdict) d = arr[x*max_rec:(x+1)*max_rec] rdt['dummy'] = d g = build_granule(data_producer_id=dprod_id, record_dictionary=rdt, param_dictionary=pdict) yield g
def _get_data(cls, config): """ A generator that retrieves config['constraints']['count'] number of sequential Fibonacci numbers @param config Dict of configuration parameters - must contain ['constraints']['count'] """ cnt = get_safe(config,'constraints.count',1) max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id') #tx_yml = get_safe(config, 'taxonomy') #ttool = TaxyTool.load(tx_yml) pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary')) def fibGenerator(): """ A Fibonacci sequence generator """ count = 0 ret = [] a, b = 1, 1 while 1: count += 1 ret.append(a) if count == max_rec: yield np.array(ret) ret=[] count = 0 a, b = b, a + b gen=fibGenerator() cnt = calculate_iteration_count(cnt, max_rec) for i in xrange(cnt): rdt = RecordDictionaryTool(param_dictionary=pdict) d = gen.next() rdt['data'] = d g = build_granule(data_producer_id=dprod_id, record_dictionary=rdt, param_dictionary=pdict) yield g
def execute(self, granule): """Processes incoming data!!!! """ rdt = RecordDictionaryTool.load_from_granule(granule) #todo: use only flat dicts for now, may change later... # rdt0 = rdt['coordinates'] # rdt1 = rdt['data'] conductivity = get_safe(rdt, 'cond') #psd.get_values('conductivity') longitude = get_safe(rdt, 'lon') # psd.get_values('longitude') latitude = get_safe(rdt, 'lat') #psd.get_values('latitude') time = get_safe(rdt, 'time') # psd.get_values('time') height = get_safe(rdt, 'height') # psd.get_values('time') log.warn('CTDL1ConductivityTransform: Got conductivity: %s' % str(conductivity)) root_rdt = RecordDictionaryTool(param_dictionary=self.cond) #todo: use only flat dicts for now, may change later... # data_rdt = RecordDictionaryTool(taxonomy=self.tx) # coord_rdt = RecordDictionaryTool(taxonomy=self.tx) scaled_conductivity = conductivity for i in xrange(len(conductivity)): scaled_conductivity[i] = (conductivity[i] / 100000.0) - 0.5 root_rdt['cond'] = scaled_conductivity root_rdt['time'] = time root_rdt['lat'] = latitude root_rdt['lon'] = longitude root_rdt['height'] = height # root_rdt['coordinates'] = coord_rdt # root_rdt['data'] = data_rdt return build_granule(data_producer_id='ctd_L1_conductivity', param_dictionary=self.cond, record_dictionary=root_rdt)
def test_build_granule_and_load_from_granule(self): pdict = ParameterDictionary() t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=np.dtype('int64'))) t_ctxt.reference_frame = AxisTypeEnum.TIME t_ctxt.uom = 'seconds since 01-01-1970' pdict.add_context(t_ctxt) lat_ctxt = ParameterContext('lat', param_type=QuantityType(value_encoding=np.dtype('float32'))) lat_ctxt.reference_frame = AxisTypeEnum.LAT lat_ctxt.uom = 'degree_north' pdict.add_context(lat_ctxt) lon_ctxt = ParameterContext('lon', param_type=QuantityType(value_encoding=np.dtype('float32'))) lon_ctxt.reference_frame = AxisTypeEnum.LON lon_ctxt.uom = 'degree_east' pdict.add_context(lon_ctxt) temp_ctxt = ParameterContext('temp', param_type=QuantityType(value_encoding=np.dtype('float32'))) temp_ctxt.uom = 'degree_Celsius' pdict.add_context(temp_ctxt) cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=np.dtype('float32'))) cond_ctxt.uom = 'unknown' pdict.add_context(cond_ctxt) pres_ctxt = ParameterContext('pres', param_type=QuantityType(value_encoding=np.dtype('float32'))) pres_ctxt.uom = 'unknown' pdict.add_context(pres_ctxt) rdt = RecordDictionaryTool(param_dictionary=pdict) #Create some arrays and fill them with random values temp_array = np.random.standard_normal(100) cond_array = np.random.standard_normal(100) pres_array = np.random.standard_normal(100) time_array = np.random.standard_normal(100) lat_array = np.random.standard_normal(100) lon_array = np.random.standard_normal(100) #Use the RecordDictionaryTool to add the values. This also would work if you used long_temp_name, etc. rdt['temp'] = temp_array rdt['conductivity'] = cond_array rdt['pres'] = pres_array rdt['time'] = time_array rdt['lat'] = lat_array rdt['lon'] = lon_array g = build_granule(data_producer_id='john', record_dictionary=rdt, param_dictionary=pdict) l_pd = ParameterDictionary.load(g.param_dictionary) #l_tx = TaxyTool.load_from_granule(g) l_rd = RecordDictionaryTool.load_from_granule(g) # Make sure we got back the same Taxonomy Object #self.assertEquals(l_pd, pdict) self.assertEquals(l_pd.ord_from_key('temp'), pdict.ord_from_key('temp')) self.assertEquals(l_pd.ord_from_key('conductivity'), pdict.ord_from_key('conductivity')) # Now test the record dictionary object self.assertEquals(l_rd._rd, rdt._rd) #self.assertEquals(l_rd._param_dict, rdt._param_dict) for k, v in l_rd.iteritems(): self.assertIn(k, rdt) if isinstance(v, np.ndarray): self.assertTrue( (v == rdt[k]).all()) else: self.assertEquals(v._rd, rdt[k]._rd)
def _get_data(cls, config): """ Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len'] @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count'] """ ext_dset_res = get_safe(config, 'external_dataset_res', None) # Get the Dataset object from the config (should have been instantiated in _init_acquisition_cycle) ds=get_safe(config, 'dataset_object') if ext_dset_res and ds: t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension'] y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension'] z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension'] var_lst = ext_dset_res.dataset_description.parameters['variables'] t_slice = get_safe(config, 'constraints.temporal_slice', (slice(0,1))) #TODO: Using 'eval' here is BAD - need to find a less sketchy way to pass constraints if isinstance(t_slice,str): t_slice=eval(t_slice) lon = ds.variables[x_vname][:] lat = ds.variables[y_vname][:] z = ds.variables[z_vname][:] t_arr = ds.variables[t_vname][t_slice] data_arrays = {} for varn in var_lst: data_arrays[varn] = ds.variables[varn][t_slice] max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') #tx_yml = get_safe(config, 'taxonomy') #ttool = TaxyTool.load(tx_yml) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary')) cnt = calculate_iteration_count(t_arr.size, max_rec) for x in xrange(cnt): ta = t_arr[x*max_rec:(x+1)*max_rec] # Make a 'master' RecDict #rdt = RecordDictionaryTool(taxonomy=ttool) rdt = RecordDictionaryTool(param_dictionary=pdict) # Make a 'coordinate' RecDict #rdt_c = RecordDictionaryTool(taxonomy=ttool) #rdt_c = RecordDictionaryTool(param_dictionary=pdict) # Make a 'data' RecDict #rdt_d = RecordDictionaryTool(taxonomy=ttool) #rdt_d = RecordDictionaryTool(param_dictionary=pdict) # Assign values to the coordinate RecDict rdt[x_vname] = lon rdt[y_vname] = lat rdt[z_vname] = z # Assign values to the data RecDict rdt[t_vname] = ta for key, arr in data_arrays.iteritems(): d = arr[x*max_rec:(x+1)*max_rec] rdt[key] = d # Add the coordinate and data RecDicts to the master RecDict #rdt['coords'] = rdt_c #rdt['data'] = rdt_d # Build and return a granule # CBM: ttool must be passed #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt) g = build_granule(data_producer_id=dprod_id, record_dictionary=rdt, param_dictionary=pdict) yield g ds.close()
def build_packet(self, *args, **kwargs): """ Build and return a granule of data. @param taxonomy the taxonomy of the granule @data dictionary containing sample data. @return granule suitable for publishing """ taxonomy_str = kwargs.get('taxonomy') data = kwargs.get('data') data_producer_id = kwargs.get('data_producer_id') if not data_producer_id: raise PacketFactoryException("data_producer_id parameter missing") if not taxonomy_str: raise PacketFactoryException("taxonomy parameter missing") if not data: raise PacketFactoryException("data parameter missing") taxonomy = self._get_taxy_tool(taxonomy_str) # the nick_names in the taxonomy: nick_names = self._get_nick_names_from_taxonomy(taxonomy) # # TODO in general, how are groups (and the individual values # belonging to the groups) to be determined? # # in this version, expect 'data' and 'coordinates' to be included in # the taxonomy -- TODO the idea would be to be more general here? ############################################################## # NOTE for the moment, using the flat data record dict 'rdt' ############################################################## # if not 'data' in nick_names: # raise PacketFactoryException("expected name 'data' in taxonomy") # if not 'coordinates' in nick_names: # raise PacketFactoryException("expected name 'coordinates' in taxonomy") rdt = RecordDictionaryTool(taxonomy=taxonomy) # data_rdt = RecordDictionaryTool(taxonomy=taxonomy) # coordinates_rdt = RecordDictionaryTool(taxonomy=taxonomy) # # rdt['data'] = data_rdt # rdt['coordinates'] = coordinates_rdt # def is_coordinate(nick_name): # # just an ad hoc check to determine which group the nick_names # # belong to # return nick_name in ['lat', 'lon', 'time', 'height'] # now, assign the values to the corresp record dicts: for name, value in data.iteritems(): handle = -1 log.info("packetfactory: name: %s" % str(name)) if name in nick_names: handle = taxonomy.get_handle(name) log.info("packetfactory: handle: %s" % str(handle)) else: handles = taxonomy.get_handles(name) log.info("packetfactory: handles: %s" % str(handles)) if len(handles) == 1: handle = handles.pop() elif len(handles) > 1: # TODO proper handling of this case log.warn("Multiple handles found for '%s': %s" % (name % handles)) if handle >= 0: # ok, the nick_name has been found, either directly as a # nick_name or via an alias; set value (using nick_name): nick_name = taxonomy.get_nick_name(handle) assert isinstance(value, list) val = numpy.array(value) # NOTE for the moment, using the flat data record dict 'rdt': rdt[nick_name] = val # if is_coordinate(nick_name): # coordinates_rdt[nick_name] = val # else: # data_rdt[nick_name] = val else: # name not found. # In the current tests this is happening with 'stream_id' log.warning("No handle found for '%s'" % name) log.debug("dictionary created: %s" % rdt.pretty_print()) return build_granule(data_producer_id=data_producer_id, taxonomy=taxonomy, record_dictionary=rdt)
def render_graphs(self, graph_data): # init Matplotlib fig = Figure(figsize=(8, 4), dpi=200, frameon=True) ax = fig.add_subplot(111) canvas = FigureCanvas(fig) imgInMem = StringIO.StringIO() # If there's no data, wait # For the simple case of testing, lets plot all time variant variables one at a time xAxisVar = "time" xAxisFloatData = graph_data[xAxisVar] rdt = RecordDictionaryTool(taxonomy=tx) # Prepare the set of y axis variables that will be plotted. This needs to be smarter and passed as # config variable to the transform yAxisVars = [] for varName, varData in graph_data.iteritems(): if varName == "time" or varName == "height" or varName == "longitude" or varName == "latitude": continue yAxisVars.append(varName) idx = 0 for varName in yAxisVars: yAxisFloatData = graph_data[varName] # Generate the plot ax.plot(xAxisFloatData, yAxisFloatData, self.line_style(idx), label=varName) idx += 1 yAxisLabel = "" # generate a filename for the output image for varName in yAxisVars: if yAxisLabel: yAxisLabel = yAxisLabel + "-" + varName else: yAxisLabel = varName fileName = yAxisLabel + "_vs_" + xAxisVar + ".png" ax.set_xlabel(xAxisVar) ax.set_ylabel(yAxisLabel) ax.set_title(yAxisLabel + " vs " + xAxisVar) ax.set_autoscale_on(False) ax.legend(loc="upper left") # Save the figure to the in memory file canvas.print_figure(imgInMem, format="png") imgInMem.seek(0) # submit resulting table back using the out stream publisher msg = { "viz_product_type": "matplotlib_graphs", "image_obj": imgInMem.getvalue(), "image_name": fileName, "content_type": "image/png", } rdt["matplotlib_graphs"] = numpy.array([msg]) # Generate a list of the graph objects generated return build_granule(data_producer_id="matplotlib_graphs_transform", taxonomy=tx, record_dictionary=rdt)
def execute(self, granule): log.debug('(Google DT transform): Received Viz Data Packet' ) #init stuff varTuple = [] dataDescription = [] dataTableContent = [] rdt = RecordDictionaryTool.load_from_granule(granule) vardict = {} vardict['time'] = get_safe(rdt, 'time') vardict['conductivity'] = get_safe(rdt, 'cond') vardict['pressure'] = get_safe(rdt, 'pres') vardict['temperature'] = get_safe(rdt, 'temp') vardict['longitude'] = get_safe(rdt, 'long') vardict['latitude'] = get_safe(rdt, 'lat') vardict['height'] = get_safe(rdt, 'height') arrLen = len(vardict['time']) # Figure out how many values are present in the granule #iinit the dataTable # create data description from the variables in the message dataDescription = [('time', 'float', 'time')] # split the data string to extract variable names for varname in vardict.keys(): #psd.list_field_names(): if varname == 'time': continue dataDescription.append((varname, 'number', varname)) # Add the records to the datatable for i in xrange(arrLen): varTuple = [] for varname,_,_ in dataDescription: if vardict[varname] == None or len(vardict[varname]) == 0: val = 0.0 else: val = float(vardict[varname][i]) varTuple.append(val) # Append the tuples to the data table if len(varTuple) > 0: dataTableContent.append(varTuple) # submit the partial datatable to the viz service out_rdt = RecordDictionaryTool(taxonomy=tx) # submit resulting table back using the out stream publisher. The data_product_id is the input dp_id # responsible for the incoming data msg = {"viz_product_type": "google_dt", "data_description": dataDescription, "data_content": dataTableContent} out_rdt['google_dt_components'] = numpy.array([msg]) log.debug('Google DT transform: Sending a granule') out_granule = build_granule(data_producer_id='google_dt_transform', taxonomy=tx, record_dictionary=out_rdt) return out_granule