def _get_data(cls, config): """ Iterable function that acquires data from a source iteratively based on constraints provided by config Passed into BaseDataHandler._publish_data and iterated to publish samples. @param config dict containing configuration parameters, may include constraints, formatters, etc @retval an iterable that returns well-formed Granule objects on each iteration """ new_flst = get_safe(config, 'constraints.new_files', []) parser_mod = get_safe(config, 'parser_mod', '') parser_cls = get_safe(config, 'parser_cls', '') module = __import__(parser_mod, fromlist=[parser_cls]) classobj = getattr(module, parser_cls) for f in new_flst: try: size = os.stat(f[0]).st_size try: #find the new data check index in config index = -1 for ndc in config['set_new_data_check']: if ndc[0] == f[0]: index = config['set_new_data_check'].index(ndc) break except: log.error('File name not found in attachment') parser = classobj(f[0], f[3]) max_rec = get_safe(config, 'max_records', 1) stream_def = get_safe(config, 'stream_def') while True: particles = parser.get_records(max_count=max_rec) if not particles: break rdt = RecordDictionaryTool(stream_definition_id=stream_def) populate_rdt(rdt, particles) g = rdt.to_granule() # TODO: record files already read for future additions... # #update new data check with the latest file position if 'set_new_data_check' in config and index > -1: # WRONG: should only record this after file finished parsing, # but may not have another yield at that point to trigger update config['set_new_data_check'][index] = (f[0], f[1], f[2], size) yield g # parser.close() except Exception as ex: # TODO: Decide what to do here, raise an exception or carry on log.error('Error parsing data file \'{0}\': {1}'.format(f, ex))
def _get_data(cls, config): """ Iterable function that acquires data from a source iteratively based on constraints provided by config Passed into BaseDataHandler._publish_data and iterated to publish samples. @param config dict containing configuration parameters, may include constraints, formatters, etc @retval an iterable that returns well-formed Granule objects on each iteration """ new_flst = get_safe(config, "constraints.new_files", []) parser_mod = get_safe(config, "parser_mod", "") parser_cls = get_safe(config, "parser_cls", "") module = __import__(parser_mod, fromlist=[parser_cls]) classobj = getattr(module, parser_cls) for f in new_flst: try: size = os.stat(f[0]).st_size try: # find the new data check index in config index = -1 for ndc in config["set_new_data_check"]: if ndc[0] == f[0]: index = config["set_new_data_check"].index(ndc) break except: log.error("File name not found in attachment") parser = classobj(f[0], f[3]) max_rec = get_safe(config, "max_records", 1) stream_def = get_safe(config, "stream_def") while True: particles = parser.get_records(max_count=max_rec) if not particles: break rdt = RecordDictionaryTool(stream_definition_id=stream_def) populate_rdt(rdt, particles) g = rdt.to_granule() # TODO: record files already read for future additions... # #update new data check with the latest file position if "set_new_data_check" in config and index > -1: # WRONG: should only record this after file finished parsing, # but may not have another yield at that point to trigger update config["set_new_data_check"][index] = (f[0], f[1], f[2], size) yield g # parser.close() except Exception as ex: # TODO: Decide what to do here, raise an exception or carry on log.error("Error parsing data file '{0}': {1}".format(f, ex))
def test_tmpsf_arrays(self): self.preload_tmpsf() pdict_id = self.dataset_management.read_parameter_dictionary_by_name('tmpsf_sample', id_only=True) stream_def_id = self.create_stream_definition('tmpsf', parameter_dictionary_id=pdict_id) data_product_id = self.create_data_product('tmpsf', stream_def_id=stream_def_id) self.activate_data_product(data_product_id) rdt = ParameterHelper.rdt_for_data_product(data_product_id) tomato = {'quality_flag': 'ok', 'preferred_timestamp': 'port_timestamp', 'internal_timestamp': 3223662780.0, 'stream_name': 'tmpsf_sample', 'values': [{'value_id': 'timestamp', 'value': 3223662780.0}, {'value_id': 'temperature', 'value': [21.4548, 21.0132, 20.9255, 21.1266, 21.1341, 21.5606, 21.2156, 21.4749, 21.3044, 21.132, 21.1798, 21.2352, 21.3488, 21.1214, 21.6426, 21.1479, 21.0069, 21.5426, 21.3204, 21.2402, 21.3968, 21.4371, 21.0411, 21.4361]}, {'value_id': 'battery_voltage', 'value': 11.5916}, {'value_id': 'serial_number', 'value': '021964'}], 'port_timestamp': 1378230448.439269, 'driver_timestamp': 3587219248.444593, 'pkt_format_id': 'JSON_Data', 'pkt_version': 1} from ion.agents.populate_rdt import populate_rdt rdt = populate_rdt(rdt, [tomato]) ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id) breakpoint(locals())
def create_test_granules(self, buffer_data=False): """ Generate test granules from particles. If buffer data is set to true then try to buffer data into a granule. If the particle has the new sequence flag set then a new granule will be generated. This method emulates the agent_stream_publisher module. :return: list of granules generated. """ base_timestamp = 3583861263.0 connection_index = 0 particles = [] particles.append( self.get_particle(base_timestamp, 10.5914, 161.06, 4.1870, 2693.0)) particles.append( self.get_particle(base_timestamp + 1, 10.5915, 161.07, 4.1871, 2693.1)) particles.append( self.get_particle(base_timestamp + 2, 10.5916, 161.08, 4.1872, 2693.2)) particles.append( self.get_particle(base_timestamp + 3, 10.5917, 161.09, 4.1873, 2693.3, True)) particles.append( self.get_particle(base_timestamp + 4, 10.5918, 161.10, 4.1874, 2693.4)) data_groups = [] result_granules = [] data_groups_index = 0 for particle in particles: # If we need a new connection then start a new group, but only if we have found # something in the current group if (particle.get('new_sequence', False) or buffer_data == False) and \ (len(data_groups) > 0 and len(data_groups[data_groups_index]) > 0): data_groups_index += 1 if len(data_groups) <= data_groups_index: data_groups.append([]) data_groups[data_groups_index].append(particle) log.debug("Granules to create: %s", len(data_groups)) for data in data_groups: connection_id = uuid.uuid4() connection_index += 1 rdt = RecordDictionaryTool(param_dictionary=self.get_param_dict()) rdt = populate_rdt(rdt, data) g = rdt.to_granule(data_producer_id='agent_res_id', connection_id=connection_id.hex, connection_index=str(connection_index)) result_granules.append(g) return result_granules
def create_granule(self, stream_name, param_dict_name, particle_list): pd_id = self.dataset_management.read_parameter_dictionary_by_name(param_dict_name, id_only=True) stream_def_id = self.pubsub_client.create_stream_definition(name=stream_name, parameter_dictionary_id=pd_id) stream_def = self.pubsub_client.read_stream_definition(stream_def_id) rdt = RecordDictionaryTool(stream_definition=stream_def) rdt = populate_rdt(rdt, particle_list) log.trace("RDT: %s", str(rdt)) g = rdt.to_granule(data_producer_id='fake_agent_id') return g
def _publish_stream_buffer(self, stream_name): """ ['quality_flag', 'preferred_timestamp', 'port_timestamp', 'lon', 'raw', 'internal_timestamp', 'time', 'lat', 'driver_timestamp'] ['quality_flag', 'preferred_timestamp', 'temp', 'density', 'port_timestamp', 'lon', 'salinity', 'pressure', 'internal_timestamp', 'time', 'lat', 'driver_timestamp', 'conductivit {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "raw", "values": [{"binary": true, "value": "MzIuMzkxOSw5MS4wOTUxMiwgNzg0Ljg1MywgICA2LjE5OTQsIDE1MDUuMTc5LCAxOSBEZWMgMjAxMiwgMDA6NTI6Mjc=", "value_id": "raw"}]}', 'time': 1355878347.744123} {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed", "values": [{"value": 32.3919, "value_id": "temp"}, {"value": 91.09512, "value_id": "conductivity"}, {"value": 784.853, "value_id": "pressure"}]}', 'time': 1355878347.744127} {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'port_timestamp': [None], 'lon': [None], 'raw': ['-4.9733,16.02390, 539.527, 34.2719, 1506.862, 19 Dec 2012, 01:03:07'], 'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117]} {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'temp': [-4.9733], 'density': [None], 'port_timestamp': [None], 'lon': [None], 'salinity': [None], 'pressure': [539.527], 'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117], 'conductivity': [16.0239]} """ try: buf_len = len(self._stream_buffers[stream_name]) if buf_len == 0: return stream_def = self._stream_defs[stream_name] if isinstance(stream_def, str): rdt = RecordDictionaryTool(stream_definition_id=stream_def) else: rdt = RecordDictionaryTool(stream_definition=stream_def) publisher = self._publishers[stream_name] vals = [] for x in xrange(buf_len): vals.append(self._stream_buffers[stream_name].pop()) rdt = populate_rdt(rdt, vals) log.info('Outgoing granule: %s', ['%s: %s' % (k, v) for k, v in rdt.iteritems()]) g = rdt.to_granule(data_producer_id=self._agent.resource_id, connection_id=self._connection_ID.hex, connection_index=str( self._connection_index[stream_name])) publisher.publish(g) log.info( 'Instrument agent %s published data granule on stream %s.', self._agent._proc_name, stream_name) log.info('Connection id: %s, connection index: %i.', self._connection_ID.hex, self._connection_index[stream_name]) self._connection_index[stream_name] += 1 except: log.exception( 'Instrument agent %s could not publish data on stream %s.', self._agent._proc_name, stream_name)
def _publish_stream_buffer(self, stream_name): """ ['quality_flag', 'preferred_timestamp', 'port_timestamp', 'lon', 'raw', 'internal_timestamp', 'time', 'lat', 'driver_timestamp'] ['quality_flag', 'preferred_timestamp', 'temp', 'density', 'port_timestamp', 'lon', 'salinity', 'pressure', 'internal_timestamp', 'time', 'lat', 'driver_timestamp', 'conductivit {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "raw", "values": [{"binary": true, "value": "MzIuMzkxOSw5MS4wOTUxMiwgNzg0Ljg1MywgICA2LjE5OTQsIDE1MDUuMTc5LCAxOSBEZWMgMjAxMiwgMDA6NTI6Mjc=", "value_id": "raw"}]}', 'time': 1355878347.744123} {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed", "values": [{"value": 32.3919, "value_id": "temp"}, {"value": 91.09512, "value_id": "conductivity"}, {"value": 784.853, "value_id": "pressure"}]}', 'time': 1355878347.744127} {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'port_timestamp': [None], 'lon': [None], 'raw': ['-4.9733,16.02390, 539.527, 34.2719, 1506.862, 19 Dec 2012, 01:03:07'], 'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117]} {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'temp': [-4.9733], 'density': [None], 'port_timestamp': [None], 'lon': [None], 'salinity': [None], 'pressure': [539.527], 'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117], 'conductivity': [16.0239]} """ try: buf_len = len(self._stream_buffers[stream_name]) if buf_len == 0: return stream_def = self._stream_defs[stream_name] if isinstance(stream_def, str): rdt = RecordDictionaryTool(stream_definition_id=stream_def) else: rdt = RecordDictionaryTool(stream_definition=stream_def) publisher = self._publishers[stream_name] vals = [] for x in xrange(buf_len): vals.append(self._stream_buffers[stream_name].pop()) rdt = populate_rdt(rdt, vals) #log.info('Outgoing granule: %s', #['%s: %s'%(k,v) for k,v in rdt.iteritems()]) #log.info('Outgoing granule preferred timestamp: %s' % rdt['preferred_timestamp'][0]) #log.info('Outgoing granule destined for stream: %s', stream_name) g = rdt.to_granule(data_producer_id=self._agent.resource_id, connection_id=self._connection_ID.hex, connection_index=str(self._connection_index[stream_name])) publisher.publish(g) #log.info('Instrument agent %s published data granule on stream %s.', #self._agent._proc_name, stream_name) #log.info('Connection id: %s, connection index: %i.', #self._connection_ID.hex, self._connection_index[stream_name]) self._connection_index[stream_name] += 1 except: log.exception('Instrument agent %s could not publish data on stream %s.', self._agent._proc_name, stream_name)
def create_test_granules(self, buffer_data=False): """ Generate test granules from particles. If buffer data is set to true then try to buffer data into a granule. If the particle has the new sequence flag set then a new granule will be generated. This method emulates the agent_stream_publisher module. :return: list of granules generated. """ base_timestamp = 3583861263.0 connection_index = 0 particles = [] particles.append(self.get_particle(base_timestamp, 10.5914, 161.06, 4.1870, 2693.0)) particles.append(self.get_particle(base_timestamp+1, 10.5915, 161.07, 4.1871, 2693.1)) particles.append(self.get_particle(base_timestamp+2, 10.5916, 161.08, 4.1872, 2693.2)) particles.append(self.get_particle(base_timestamp+3, 10.5917, 161.09, 4.1873, 2693.3, True)) particles.append(self.get_particle(base_timestamp+4, 10.5918, 161.10, 4.1874, 2693.4)) data_groups = [] result_granules = [] data_groups_index = 0 for particle in particles: # If we need a new connection then start a new group, but only if we have found # something in the current group if (particle.get('new_sequence', False) or buffer_data == False) and \ (len(data_groups) > 0 and len(data_groups[data_groups_index]) > 0): data_groups_index += 1 if len(data_groups) <= data_groups_index: data_groups.append([]) data_groups[data_groups_index].append(particle) log.debug("Granules to create: %s", len(data_groups)) for data in data_groups: connection_id = uuid.uuid4() connection_index += 1 rdt = RecordDictionaryTool(param_dictionary=self.get_param_dict()) rdt = populate_rdt(rdt, data) g = rdt.to_granule(data_producer_id='agent_res_id', connection_id=connection_id.hex, connection_index=str(connection_index)) result_granules.append(g) return result_granules
def test_vel3d_particles(self): """ test_particles """ stream_name = 'vel3d_parsed' param_dict_name = 'vel3d_b_sample' particle_list = [ { "driver_timestamp": 3579022766.361967, "internal_timestamp": 3579047922.0, "pkt_format_id": "JSON_Data", "pkt_version": 1, "port_timestamp": 3579022762.357902, "preferred_timestamp": "port_timestamp", "quality_flag": "ok", "stream_name": "vel3d_b_sample", "values": [ {"value": 3579047922.0, "value_id": "date_time_string"}, {"value": 5, "value_id": "fractional_second"}, {"value": "8000", "value_id": "velocity_beam_a"}, {"value": "8000", "value_id": "velocity_beam_b"}, {"value": "8000", "value_id": "velocity_beam_c"}, {"value": "8000", "value_id": "velocity_beam_d"}, {"value": 999.0, "value_id": "turbulent_velocity_east"}, {"value": 999.0, "value_id": "turbulent_velocity_north"}, {"value": 999.0, "value_id": "turbulent_velocity_up"}, {"value": 2.16, "value_id": "temperature"}, {"value": 1.0, "value_id": "mag_comp_x"}, {"value": -0.0, "value_id": "mag_comp_y"}, {"value": -7.9, "value_id": "pitch"}, {"value": -78.2, "value_id": "roll"}] } ] class RDT(dict): def __init__(self): super(RDT, self).__init__() self.temporal_parameter = None rdt = RDT() for x in particle_list[0]['values']: rdt[x['value_id']] = None rdt = populate_rdt(rdt, particle_list)
def _publish_stream_buffer(self, stream_name): """ ['quality_flag', 'preferred_timestamp', 'port_timestamp', 'lon', 'raw', 'internal_timestamp', 'time', 'lat', 'driver_timestamp'] ['quality_flag', 'preferred_timestamp', 'temp', 'density', 'port_timestamp', 'lon', 'salinity', 'pressure', 'internal_timestamp', 'time', 'lat', 'driver_timestamp', 'conductivit {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "raw", "values": [{"binary": true, "value": "MzIuMzkxOSw5MS4wOTUxMiwgNzg0Ljg1MywgICA2LjE5OTQsIDE1MDUuMTc5LCAxOSBEZWMgMjAxMiwgMDA6NTI6Mjc=", "value_id": "raw"}]}', 'time': 1355878347.744123} {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed", "values": [{"value": 32.3919, "value_id": "temp"}, {"value": 91.09512, "value_id": "conductivity"}, {"value": 784.853, "value_id": "pressure"}]}', 'time': 1355878347.744127} {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'port_timestamp': [None], 'lon': [None], 'raw': ['-4.9733,16.02390, 539.527, 34.2719, 1506.862, 19 Dec 2012, 01:03:07'], 'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117]} {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'temp': [-4.9733], 'density': [None], 'port_timestamp': [None], 'lon': [None], 'salinity': [None], 'pressure': [539.527], 'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117], 'conductivity': [16.0239]} """ try: ### Flush the agent state to the object store. This was added for the dataset agent publishers who store ### their driver state in the object store. We had talked about about flushing the state after publiction ### by grabbing current state here, doing out work, and then saving this state. However, flush_state ### doesn't accept parameters. It seems more complex than simply flushing here. There is a slight downside ### if publishing fails then the state will be slightly out of sync. if self._flush_on_publish: log.debug("ASP Flush Agent State") self._agent._flush_state() buf_len = len(self._stream_buffers[stream_name]) if buf_len == 0: return stream_def = self._stream_defs[stream_name] if isinstance(stream_def, str): rdt = RecordDictionaryTool(stream_definition_id=stream_def) else: rdt = RecordDictionaryTool(stream_definition=stream_def) publisher = self._publishers[stream_name] vals = [] for x in xrange(buf_len): vals.append(self._stream_buffers[stream_name].pop()) rdt = populate_rdt(rdt, vals) #log.info('Outgoing granule: %s', #['%s: %s'%(k,v) for k,v in rdt.iteritems()]) #log.info('Outgoing granule preferred timestamp: %s' % rdt['preferred_timestamp'][0]) #log.info('Outgoing granule destined for stream: %s', stream_name) g = rdt.to_granule(data_producer_id=self._agent.resource_id, connection_id=self._connection_ID.hex, connection_index=str(self._connection_index[stream_name])) publisher.publish(g) #log.info('Instrument agent %s published data granule on stream %s.', #self._agent._proc_name, stream_name) #log.info('Connection id: %s, connection index: %i.', #self._connection_ID.hex, self._connection_index[stream_name]) self._connection_index[stream_name] += 1 except: log.exception('Instrument agent %s could not publish data on stream %s.', self._agent._proc_name, stream_name)