def _parse_header(self): """ Parse the start time of the profile and the sensor """ # read the first bytes from the file header = self._stream_handle.read(HEADER_BYTES) match = HEADER_MATCHER.match(header) # parse the header if match is not None: # use the profile start time as the timestamp fields = struct.unpack('>II', match.group(2)) timestamp = int(fields[1]) self._timestamp = float(ntplib.system_to_ntp_time(timestamp)) log.debug(self._start_data_particle_class) sample = self._extract_sample(self._start_data_particle_class, None, header, self._timestamp) if sample: # create particle self._increment_state(HEADER_BYTES) log.debug("Extracting header %s with read_state: %s", sample, self._read_state) self._saved_header = (sample, copy.copy(self._read_state)) else: raise SampleException("File header does not match header regex")
def add_timestamps(self, start_end_list): """ Add timestamps to a list of (start, end) tuples that are normalized to coincide with the raw block list indices. @param start_end_list The list of (start, end) tuples such as: [(15, 20), (35, 37)] @retval The timestamps associated with these based on the values in the raw block list. For example, if the raw block list is [(0, 14, 123.456), (15, 20, 234.567), (21, 37, 345.784)], then the result will be [(15, 20, 234.567), (35, 37, 345.784)] """ result_list = [] for item in start_end_list: # simple case if it already has a timestamp if (len(item) == 3): result_list.append(item) break elif (len(item) == 2): (s, e) = (item[0], item[1]) else: raise SampleException("Invalid pair encountered!") for (raw_s, raw_e, raw_t) in self.raw_chunk_list: if (s >= raw_e): continue else: result_list.append((s, e, raw_t)) break log.trace("add_timestamp returning result_list: %s", result_list) return result_list
def __init__(self, raw_data, *args, **kwargs): super(AdcptMDspecInstrumentDataParticle, self).__init__(raw_data, *args, **kwargs) # construct the timestamp from the file time file_time = self.raw_data[FILE_TIME_POSITION] match = DATE_TIME_MATCHER.match(file_time) if match: timestamp = (int(match.group('year')) + 2000, int(match.group('month')), int(match.group('day')), int(match.group('hour')), int(match.group('minute')), 0.0, 0, 0, 0) elapsed_seconds = calendar.timegm(timestamp) self.set_internal_timestamp(unix_time=elapsed_seconds) else: # timestamp is essential for a data particle - no timestamp, bail out raise SampleException( "AdcptMDspecInstrumentDataParticle: Unable to construct " "internal timestamp from file time: %s" % file_time) self.instrument_particle_map = DSPEC_DATA_MAP
def calculate_timestamp(self, year_and_day_of_year, sample_time): """ Calculate the timestamp :param year_and_day_of_year: Integer year and day of year value :param sample_time: Sample time in floating point hours :return: The timestamp in ntp64 """ # turn year and day of year integer into a string to pull out specific digits [year, day_of_year] = get_year_and_day_of_year(year_and_day_of_year) if year is None or day_of_year is None: # need at least 5 digits to get year and day of year msg = 'Not enough digits for year and day of year: %s, unable to calculate timestamp' % \ str(year_and_day_of_year) log.warning(msg) self._exception_callback(SampleException(msg)) # return no timestamp so the particle is not calculated return None # convert sample time in floating point hours to hours, minutes, seconds, and microseconds hours = int(sample_time) minutes = int(60.0 * (sample_time - float(hours))) seconds = 3600.0 * (sample_time - float(hours)) - float(minutes) * 60.0 microseconds = seconds - int(seconds) # convert to a datetime (doesn't handle microseconds, they are included in final utc timestamp) date = datetime.datetime(year, 1, 1) + datetime.timedelta(days=day_of_year - 1, hours=hours, minutes=minutes, seconds=int(seconds)) # convert from datetime to utc seconds, including microseconds since Jan 1 1970 utc_timestamp = calendar.timegm(date.timetuple()) + microseconds # convert to seconds since Jan 1 1900 for ntp return ntplib.system_to_ntp_time(utc_timestamp)
def _read_column_labels(self): """ Read the next three lines to populate column data. Row 1 == labels Row 2 == units Row 3 == column byte size Currently we are only able to support 3 label line rows. If num_label_lines != 3 then raise an exception. """ if self._header_dict.get('num_label_lines') != 3: raise SampleException("Label line count must be 3 for this parser") # read the next 3 rows that describe each column of data self._header_dict['labels'] = self._stream_handle.readline().strip( ).split() self._header_dict['data_units'] = self._stream_handle.readline().strip( ).split() num_of_bytes = self._stream_handle.readline().strip().split() num_of_bytes = map(int, num_of_bytes) self._header_dict['num_of_bytes'] = num_of_bytes log.debug("Label count: %d", len(self._header_dict['labels']))
def _build_parsed_values(self): """ Take the clock data and parse it into values with appropriate tags. @throws SampleException If there is a problem with sample creation """ try: minutes, seconds, day, hour, year, month, _ = struct.unpack( '<6B2s', self.raw_data) except Exception as e: log.error('Error creating particle clock data raw data: %r', self.raw_data) raise SampleException(e) minutes = int('%02x' % minutes) seconds = int('%02x' % seconds) day = int('%02x' % day) hour = int('%02x' % hour) year = int('%02x' % year) month = int('%02x' % month) result = [{ VID: NortekEngClockDataParticleKey.DATE_TIME_ARRAY, VAL: [minutes, seconds, day, hour, year, month] }] log.debug('NortekEngClockDataParticle: particle=%r', result) return result
def parse_file(self): """ The main parsing function which loops over each line in the file and extracts particles if the correct format is found. """ # read the first line in the file line = self._stream_handle.readline() while line: # check for a data line or a dcl logger line we specifically ignore data_match = DATA_LINE_MATCHER.match(line) ignore_match = IGNORE_LINE_MATCHER.match(line) if data_match: # found a data line, extract this particle particle = self._extract_sample(self.particle_class, None, data_match, None) self._record_buffer.append(particle) elif not ignore_match: # we found a line with an unknown format, call an exception error_message = 'Found line with unknown format %s' % line log.warn(error_message) self._exception_callback(SampleException(error_message)) # read the next line line = self._stream_handle.readline()
def parse_chunks(self): """ Parse out any pending data chunks in the chunker. If it is a valid data piece, build a particle, update the position and timestamp. Go until the chunker has no more valid data. @retval a list of tuples with sample particles encountered in this parsing, plus the state. An empty list of nothing was parsed. """ result_particles = [] (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index() non_data = None # sieve looks for timestamp, update and increment position while (chunk != None): time_match = TIME_MATCHER.match(chunk) data_match = DATA_MATCHER.match(chunk) if time_match: log.trace("Encountered timestamp in data stream: %s", time_match.group(0)) self._timestamp = self._convert_string_to_timestamp( time_match.group(0)) self._increment_state(end, self._timestamp) elif data_match: if self._timestamp <= 1.0: raise SampleException( "No reasonable timestamp encountered at beginning of file!" ) # particle-ize the data block received, return the record sample = self._extract_sample(self._particle_class, DATA_MATCHER, chunk, self._timestamp) if sample: # create particle log.trace("Extracting sample chunk %s with read_state: %s", chunk, self._read_state) self._increment_state(end, self._timestamp) self._increment_timestamp( ) # increment one samples worth of time result_particles.append( (sample, copy.copy(self._read_state))) # Check for noise between records, but ignore newline. This is detecting noise following # the last successful chunk read which is why it is post sample generation. if non_data is not None and non_data != "\n": log.info("Gap in datafile detected.") log.trace("Noise detected: %s", non_data) self.start_new_sequence() if non_data is not None: self._increment_state(len(non_data), self._timestamp) (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index() (nd_timestamp, non_data) = self._chunker.get_next_non_data(clean=True) return result_particles
def generate_dict(self): """ Generate a simple dictionary of sensor data and timestamps, without going to JSON. This is useful for the times when JSON is not needed to go across an interface. There are times when particles are used internally to a component/process/module/etc. @retval A python dictionary with the proper timestamps and data values @throws InstrumentDriverException if there is a problem wtih the inputs """ # Do we wan't downstream processes to check this? #for time in [DataParticleKey.INTERNAL_TIMESTAMP, # DataParticleKey.DRIVER_TIMESTAMP, # DataParticleKey.PORT_TIMESTAMP]: # if not self._check_timestamp(self.contents[time]): # raise SampleException("Invalid port agent timestamp in raw packet") # verify preferred timestamp exists in the structure... if not self._check_preferred_timestamps(): raise SampleException("Preferred timestamp not in particle!") # build response structure values = self._build_parsed_values() result = self._build_base_structure() result[DataParticleKey.STREAM_NAME] = self.data_particle_type() result[DataParticleKey.VALUES] = values log.debug("Serialize result: %s", result) return result
def _build_parsed_values(self): """ @throws SampleException If there is a problem with sample creation """ match = IRISStatusSignOnParticle.regex_compiled().match(self.raw_data) try: iris_time = match.group(1) timestamp = time.strptime(iris_time, "%Y/%m/%d %H:%M:%S") self.set_internal_timestamp(unix_time=time.mktime(timestamp)) ntp_timestamp = ntplib.system_to_ntp_time(time.mktime(timestamp)) except ValueError: raise SampleException("ValueError while converting data: [%s]" % self.raw_data) result = [ { DataParticleKey.VALUE_ID: IRISSignOnParticleKey.TIME, DataParticleKey.VALUE: ntp_timestamp }, # Add firmware version" #{DataParticleKey.VALUE_ID: IRISSignOnParticleKey.SN, # DataParticleKey.VALUE: sn} ] return result
def _build_parsed_values(self): """ Take something in the data format and turn it into an array of dictionaries defining the data in the particle with the appropriate tag. @throws SampleException If there is a problem with sample creation """ if len(self.raw_data[0]) != TIME_RECORD_BYTES: raise SampleException( "DofstKWfpMetadataParticle: Received unexpected number of bytes %d" % len(self.raw_data[0])) # data is passed in as a tuple, first element is the two timestamps as a binary string # the second is the number of samples as an float timefields = struct.unpack('>II', self.raw_data[0]) number_samples = self.raw_data[1] result = [ self._encode_value(WfpMetadataParserDataParticleKey.WFP_TIME_ON, timefields[0], int), self._encode_value(WfpMetadataParserDataParticleKey.WFP_TIME_OFF, timefields[1], int), self._encode_value( WfpMetadataParserDataParticleKey.WFP_NUMBER_SAMPLES, number_samples, int) ] return result
def _build_parsed_values(self): match = METBK_SampleDataParticle.regex_compiled().match(self.raw_data) if not match: raise SampleException("METBK_SampleDataParticle: No regex match of parsed sample data: [%s]", self.raw_data) result = [{DataParticleKey.VALUE_ID: METBK_SampleDataParticleKey.BAROMETRIC_PRESSURE, DataParticleKey.VALUE: float(match.group(1))}, {DataParticleKey.VALUE_ID: METBK_SampleDataParticleKey.RELATIVE_HUMIDITY, DataParticleKey.VALUE: float(match.group(2))}, {DataParticleKey.VALUE_ID: METBK_SampleDataParticleKey.AIR_TEMPERATURE, DataParticleKey.VALUE: float(match.group(3))}, {DataParticleKey.VALUE_ID: METBK_SampleDataParticleKey.LONGWAVE_IRRADIANCE, DataParticleKey.VALUE: float(match.group(4))}, {DataParticleKey.VALUE_ID: METBK_SampleDataParticleKey.PRECIPITATION, DataParticleKey.VALUE: float(match.group(5))}, {DataParticleKey.VALUE_ID: METBK_SampleDataParticleKey.SEA_SURFACE_TEMPERATURE, DataParticleKey.VALUE: float(match.group(6))}, {DataParticleKey.VALUE_ID: METBK_SampleDataParticleKey.SEA_SURFACE_CONDUCTIVITY, DataParticleKey.VALUE: float(match.group(7))}, {DataParticleKey.VALUE_ID: METBK_SampleDataParticleKey.SHORTWAVE_IRRADIANCE, DataParticleKey.VALUE: float(match.group(8))}, {DataParticleKey.VALUE_ID: METBK_SampleDataParticleKey.EASTWARD_WIND_VELOCITY, DataParticleKey.VALUE: float(match.group(9))}, {DataParticleKey.VALUE_ID: METBK_SampleDataParticleKey.NORTHWARD_WIND_VELOCITY, DataParticleKey.VALUE: float(match.group(10))}] log.debug("METBK_SampleDataParticle._build_parsed_values: result=%s" % result) return result
def parse_file(self): """ Entry point into parsing the file, loop over each line and interpret it until the entire file is parsed """ # find out the file size by getting the last offset from the stream handle self._stream_handle.seek(0, io.SEEK_END) end_offset = self._stream_handle.tell() # the file must be a multiple of 55 bytes since this is how long a record it, if it is not there is no way to # parse this file if end_offset % RECORD_SIZE != 0: msg = "Binary file is not an even multiple of record size, records cannot be identified." log.error(msg) raise SampleException(msg) # seek back to the beginning of the file self._stream_handle.seek(0, io.SEEK_SET) record = self._stream_handle.read(RECORD_SIZE) while record: particle = self._extract_sample(FdchpADataParticle, None, record) self._record_buffer.append(particle) record = self._stream_handle.read(RECORD_SIZE)
def _build_parsed_values(self): """ Take something in the data format and turn it into a particle with the appropriate tag. @throws SampleException If there is a problem with sample creation """ # match the data inside the wrapper if len(self.raw_data) < RATE_BYTES or self.raw_data[0] != RATE_ID: raise SampleException( "MopakODclRateParserDataParticle: Not enough bytes provided in [%s]", self.raw_data) fields = struct.unpack('>ffffffI', self.raw_data[1:RATE_BYTES - 2]) result = [ self._encode_value(MopakODclRateParserDataParticleKey.MOPAK_ROLL, fields[0], float), self._encode_value(MopakODclRateParserDataParticleKey.MOPAK_PITCH, fields[1], float), self._encode_value(MopakODclRateParserDataParticleKey.MOPAK_YAW, fields[2], float), self._encode_value( MopakODclRateParserDataParticleKey.MOPAK_ANG_RATEX, fields[3], float), self._encode_value( MopakODclRateParserDataParticleKey.MOPAK_ANG_RATEY, fields[4], float), self._encode_value( MopakODclRateParserDataParticleKey.MOPAK_ANG_RATEZ, fields[5], float), self._encode_value(MopakODclRateParserDataParticleKey.MOPAK_TIMER, fields[6], int) ] log.trace('MopakOStcRateParserDataParticle: particle=%s', result) return result
def _build_parsed_values(self): """ Take something in the data format and turn it into a particle with the appropriate tag. @throws SampleException If there is a problem with sample creation """ # match the data inside the wrapper match = DATA_MATCHER.match(self.raw_data) if not match: raise SampleException( "RteODclParserDataParticle: No regex match of \ parsed sample data [%s]", self.raw_data) result = [ self._encode_value(RteODclParserDataParticleKey.RTE_TIME, match.group(1), str), self._encode_value(RteODclParserDataParticleKey.RTE_COULOMBS, match.group(3), float), self._encode_value(RteODclParserDataParticleKey.RTE_AVG_Q_CURRENT, match.group(5), float), self._encode_value(RteODclParserDataParticleKey.RTE_AVG_VOLTAGE, match.group(7), float), self._encode_value( RteODclParserDataParticleKey.RTE_AVG_SUPPLY_VOLTAGE, match.group(9), float), self._encode_value(RteODclParserDataParticleKey.RTE_HITS, match.group(11), int), self._encode_value(RteODclParserDataParticleKey.RTE_STATE, match.group(13), int) ] log.debug('RteODclParserDataParticle: particle=%s', result) return result
def parse_ct_record(self, ct_record, sio_header_timestamp): """ This function parses a Telemetered CT record and returns a list of data particles. Parameters: chunk - the input which is being parsed sio_header_timestamp - required for particle, passed through """ particles = [] last_index = len(ct_record) start_index = 0 while start_index < last_index: # # Look for a match in the next group of bytes # ct_match = TEL_CT_MATCHER.match(ct_record[start_index:start_index + TEL_CT_SAMPLE_BYTES]) if ct_match is not None: # # Generate the data particle. # Data stored for each particle is a tuple of the following: # SIO header timestamp (input parameter) # inductive ID # science data (temperature, conductivity, pressure) # time of science data # sample = self._extract_sample( CtdmoGhqrSioTelemeteredInstrumentDataParticle, None, (sio_header_timestamp, ct_match.group(TEL_CT_GROUP_ID), ct_match.group(TEL_CT_GROUP_SCIENCE_DATA), ct_match.group(TEL_CT_GROUP_TIME))) if sample is not None: # # Add this particle to the list of particles generated # so far for this chunk of input data. # particles.append(sample) start_index += TEL_CT_SAMPLE_BYTES # # If there wasn't a match, the input data is messed up. # else: log.error( 'unknown data found in CT record %s at %d, leaving out the rest', binascii.b2a_hex(ct_record), start_index) self._exception_callback( SampleException( 'unknown data found in CT record at %d, leaving out the rest' % start_index)) break # # Once we reach the end of the input data, # return the number of particles generated and the list of particles. # return particles
def parse_chunks(self): """ Parse out any pending data chunks in the chunker. If it is a valid data piece, build a particle, update the position and timestamp. Go until the chunker has no more valid data. @retval a list of tuples with sample particles encountered in this parsing, plus the state. An empty list of nothing was parsed. """ result_particles = [] (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index(clean=True) # if there is any non data handle it self.handle_non_data(non_data, non_end, start) while chunk is not None: header_match = SIO_HEADER_MATCHER.match(chunk) if header_match.group(SIO_HEADER_GROUP_ID) == 'CS': data_match = ENG_MATCHER.match(chunk) if data_match: # put timestamp from hex string to float: posix_time = int( header_match.group(SIO_HEADER_GROUP_TIMESTAMP), 16) log.debug('utc timestamp %s', datetime.utcfromtimestamp(posix_time)) timestamp = ntplib.system_to_ntp_time(float(posix_time)) # particle-ize the data block received, return the record sample = self._extract_sample(self._particle_class, None, data_match, internal_timestamp=timestamp) if sample: # create particle result_particles.append(sample) else: log.warn('CS data does not match REGEX') self._exception_callback( SampleException('CS data does not match REGEX')) # 'PS' IDs will also be in this file but are specifically ignored elif header_match.group(SIO_HEADER_GROUP_ID) != 'PS': message = 'Unexpected Sio Header ID %s' % header_match.group( SIO_HEADER_GROUP_ID) log.warn(message) self._exception_callback(UnexpectedDataException(message)) (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index(clean=True) # if there is any non data handle it self.handle_non_data(non_data, non_end, start) return result_particles
def _build_parsed_values(self): """ Take something in the ADC data format and split it into Chanel 1 High Input Impedance Electrode, Chanel 2 High Input Impedance Electrode, H2 Electrode, S2 Electrode, Type E Thermocouple 1, Type E Thermocouple 2, Reference Thermistor, Board Thermistor @throws SampleException If there is a problem with sample creation """ match = THSPHParticle.regex_compiled().match(self.raw_data) if not match: raise SampleException("No regex match of THSPH parsed sample data: [%s]" % self.raw_data) try: electrode1 = self.hex2value(match.group(1)) electrode2 = self.hex2value(match.group(2)) h2electrode = self.hex2value(match.group(3)) s2electrode = self.hex2value(match.group(4)) thermocouple1 = self.hex2value(match.group(5)) thermocouple2 = self.hex2value(match.group(6)) ref_thermistor = self.hex2value(match.group(7)) board_thermistor = self.hex2value(match.group(8)) except ValueError: raise SampleException("ValueError while converting data: [%s]" % self.raw_data) result = [{DataParticleKey.VALUE_ID: THSPHDataParticleKey.HIGH_IMPEDANCE_ELECTRODE_1, DataParticleKey.VALUE: electrode1}, {DataParticleKey.VALUE_ID: THSPHDataParticleKey.HIGH_IMPEDANCE_ELECTRODE_2, DataParticleKey.VALUE: electrode2}, {DataParticleKey.VALUE_ID: THSPHDataParticleKey.H2_ELECTRODE, DataParticleKey.VALUE: h2electrode}, {DataParticleKey.VALUE_ID: THSPHDataParticleKey.S2_ELECTRODE, DataParticleKey.VALUE: s2electrode}, {DataParticleKey.VALUE_ID: THSPHDataParticleKey.THERMOCOUPLE1, DataParticleKey.VALUE: thermocouple1}, {DataParticleKey.VALUE_ID: THSPHDataParticleKey.THERMOCOUPLE2, DataParticleKey.VALUE: thermocouple2}, {DataParticleKey.VALUE_ID: THSPHDataParticleKey.REFERENCE_THERMISTOR, DataParticleKey.VALUE: ref_thermistor}, {DataParticleKey.VALUE_ID: THSPHDataParticleKey.BOARD_THERMISTOR, DataParticleKey.VALUE: board_thermistor}] return result
def _build_parsed_values(self): """ Take something in the data format and turn it into an array of dictionaries defining the data in the particle with the appropriate tag. @throws SampleException If there is a problem with sample creation """ match = WFP_E_GLOBAL_RECOVERED_ENG_DATA_SAMPLE_MATCHER.match( self.raw_data) if not match: raise SampleException( "DostaLnWfpInstrumentParserDataParticle: No regex match of parsed sample data: [0x%s]", binascii.b2a_hex(self.raw_data)) try: # Let's first get the 32-bit unsigned int timestamp which should be in the first match group fields_prof = struct.unpack_from('>I', match.group(1)) wfp_timestamp = fields_prof[0] # Now let's grab the global engineering data record match group # Should be 5 float 32-bit values followed by 3 unsigned int 16-bit values fields_prof = struct.unpack_from('>fffffHHH', match.group(2)) # the optode_oxygen field should be the 4th value estimated_oxygen_concentration = fields_prof[3] # the optode_oxygen field should be the 5th value optode_temperature = fields_prof[4] except (ValueError, TypeError, IndexError) as ex: raise SampleException( "Error (%s) while decoding parameters in data: [0x%s]" % (ex, binascii.b2a_hex(match.group(0)))) result = [ self._encode_value( DostaLnWfpInstrumentParserDataParticleKey. ESTIMATED_OXYGEN_CONCENTRATION, estimated_oxygen_concentration, float), self._encode_value( DostaLnWfpInstrumentParserDataParticleKey.OPTODE_TEMPERATURE, optode_temperature, float), self._encode_value( DostaLnWfpInstrumentParserDataParticleKey.WFP_TIMESTAMP, wfp_timestamp, int) ] return result
def unknown_data_exception(self, unknown_data): """ Raise an exception for data with an unknown format :param unknown_data: The unknown data """ msg = 'Found %d bytes unknown format: 0x%s' % (len(unknown_data), binascii.hexlify(unknown_data)) log.warning(msg) self._exception_callback(SampleException(msg))
def _build_parsed_values(self): """ Take something in the sample format and split it into a PAR values (with an appropriate tag) @throw SampleException If there is a problem with sample creation """ match = SAMPLE_REGEX.match(self.raw_data) if not match: raise SampleException("No regex match of parsed sample data: [%s]" % self.decoded_raw) log.trace("Matching sample [%s], [%s], [%s], [%s], [%s], [%s], [%s], [%s], [%s], [%s], [%s], [%s]", match.group(1),match.group(2),match.group(3),match.group(4),match.group(5), match.group(6),match.group(7),match.group(8),match.group(9),match.group(10), match.group(11),match.group(12)) res_5 = float(match.group(1)) res_x1 = float(match.group(2)) res_x5 = float(match.group(3)) h_5 = float(match.group(4)) h_x1 = float(match.group(5)) h_x5 = float(match.group(6)) eh = float(match.group(7)) ref_temp_v = float(match.group(8)) ref_temp_c = float(match.group(9)) res_temp_v = float(match.group(10)) res_temp_c = float(match.group(11)) batt_v = float(match.group(12)) result = [{DataParticleKey.VALUE_ID: BarsDataParticleKey.RESISTIVITY_5, DataParticleKey.VALUE: res_5}, {DataParticleKey.VALUE_ID: BarsDataParticleKey.RESISTIVITY_X1, DataParticleKey.VALUE: res_x1}, {DataParticleKey.VALUE_ID: BarsDataParticleKey.RESISTIVITY_X5, DataParticleKey.VALUE: res_x5}, {DataParticleKey.VALUE_ID: BarsDataParticleKey.HYDROGEN_5, DataParticleKey.VALUE: h_5}, {DataParticleKey.VALUE_ID: BarsDataParticleKey.HYDROGEN_X1, DataParticleKey.VALUE: h_x1}, {DataParticleKey.VALUE_ID: BarsDataParticleKey.HYDROGEN_X5, DataParticleKey.VALUE: h_x5}, {DataParticleKey.VALUE_ID: BarsDataParticleKey.EH_SENSOR, DataParticleKey.VALUE: eh}, {DataParticleKey.VALUE_ID: BarsDataParticleKey.REFERENCE_TEMP_VOLTS, DataParticleKey.VALUE: ref_temp_v}, {DataParticleKey.VALUE_ID: BarsDataParticleKey.REFERENCE_TEMP_DEG_C, DataParticleKey.VALUE: ref_temp_c}, {DataParticleKey.VALUE_ID: BarsDataParticleKey.RESISTIVITY_TEMP_VOLTS, DataParticleKey.VALUE: res_temp_v}, {DataParticleKey.VALUE_ID: BarsDataParticleKey.RESISTIVITY_TEMP_DEG_C, DataParticleKey.VALUE: res_temp_c}, {DataParticleKey.VALUE_ID: BarsDataParticleKey.BATTERY_VOLTAGE, DataParticleKey.VALUE: batt_v} ] return result
def _build_parsed_values(self): """ Parse SAMI2-PCO2 measurement records from raw data into a dictionary SAMI Sample Record Regular SAMI (PCO2) data records produced by the instrument on either command or via an internal schedule. Like the control records, the messages are preceded by a '*' character and terminated with a '\r'. Sample string: *542705CEE91CC800400019096206800730074C2CE04274003B0018096106800732074E0D82066124 A full description of the data record strings can be found in the vendor supplied SAMI Record Format document. """ matched = PCO2W_SAMPLE_REGEX_MATCHER.match(self.raw_data) if not matched: raise SampleException( "No regex match of parsed sample data: [%s]" % self.decoded_raw) particle_keys = [ Pco2wSamiSampleDataParticleKey.UNIQUE_ID, Pco2wSamiSampleDataParticleKey.RECORD_LENGTH, Pco2wSamiSampleDataParticleKey.RECORD_TYPE, Pco2wSamiSampleDataParticleKey.RECORD_TIME, Pco2wSamiSampleDataParticleKey.LIGHT_MEASUREMENTS, Pco2wSamiSampleDataParticleKey.VOLTAGE_BATTERY, Pco2wSamiSampleDataParticleKey.THERMISTER_RAW, Pco2wSamiSampleDataParticleKey.CHECKSUM ] result = [] grp_index = 1 for key in particle_keys: if key in [Pco2wSamiSampleDataParticleKey.LIGHT_MEASUREMENTS]: # parse group 5 into 14, 2 byte (4 character) values stored in # an array. light = matched.group(grp_index) light = [ int(light[i:i + 4], 16) for i in xrange(0, len(light), 4) ] result.append({ DataParticleKey.VALUE_ID: key, DataParticleKey.VALUE: light }) else: result.append({ DataParticleKey.VALUE_ID: key, DataParticleKey.VALUE: int(matched.group(grp_index), 16) }) grp_index += 1 return result
def _build_parsed_values(self): """ Take the velocity header data sample format and parse it into values with appropriate tags. @throws SampleException If there is a problem with sample creation """ log.debug('VectorVelocityHeaderDataParticle: raw data =%r', self.raw_data) try: unpack_string = '<4s6sH8B20sH' sync, timestamp, number_of_records, noise1, noise2, noise3, _, correlation1, correlation2, correlation3, _,\ _, cksum = struct.unpack(unpack_string, self.raw_data) if not validate_checksum('<20H', self.raw_data): log.warn("Failed checksum in %s from instrument (%r)", self._data_particle_type, self.raw_data) self.contents[DataParticleKey. QUALITY_FLAG] = DataParticleValue.CHECKSUM_FAILED timestamp = common.convert_time(timestamp) self.set_internal_timestamp( (timestamp - datetime(1900, 1, 1)).total_seconds()) except Exception as e: log.error( 'Error creating particle vel3d_cd_data_header, raw data: %r', self.raw_data) raise SampleException(e) result = [{ VID: VectorVelocityHeaderDataParticleKey.TIMESTAMP, VAL: str(timestamp) }, { VID: VectorVelocityHeaderDataParticleKey.NUMBER_OF_RECORDS, VAL: number_of_records }, { VID: VectorVelocityHeaderDataParticleKey.NOISE1, VAL: noise1 }, { VID: VectorVelocityHeaderDataParticleKey.NOISE2, VAL: noise2 }, { VID: VectorVelocityHeaderDataParticleKey.NOISE3, VAL: noise3 }, { VID: VectorVelocityHeaderDataParticleKey.CORRELATION1, VAL: correlation1 }, { VID: VectorVelocityHeaderDataParticleKey.CORRELATION2, VAL: correlation2 }, { VID: VectorVelocityHeaderDataParticleKey.CORRELATION3, VAL: correlation3 }] log.debug('VectorVelocityHeaderDataParticle: particle=%s', result) return result
def _build_parsed_values(self): """ Take something in the data format and turn it into an array of dictionaries defining the data in the particle with the appropriate tag. @throws SampleException If there is a problem with sample creation """ # NOTE: since we are dropping the status messages in the sieve, only # sampes should make it here if len(self.raw_data) != E_GLOBAL_SAMPLE_BYTES: raise SampleException( "Error (%s) while decoding parameters in data: [%s]" % (ex, match.group(0))) else: try: match = WFP_E_SAMPLE_MATCHER.match(self.raw_data) # grab the timestamp from the first match group fields_prof = struct.unpack('>I', match.group(1)) wfp_timestamp = fields_prof[0] # and parse the rest of the data from the next match group fields_prof = struct.unpack('>f f f f f H H H', match.group(2)) optode_oxygen = fields_prof[3] optode_temperature = fields_prof[4] except (ValueError, TypeError, IndexError) as ex: raise SampleException( "Error (%s) while decoding parameters in data: [%s]" % (ex, match.group(0))) result = [ self._encode_value( DostaLnWfpSioMuleDataParticleKey.OPTODE_OXYGEN, optode_oxygen, float), self._encode_value( DostaLnWfpSioMuleDataParticleKey.OPTODE_TEMPERATURE, optode_temperature, float), self._encode_value( DostaLnWfpSioMuleDataParticleKey.WFP_TIMESTAMP, wfp_timestamp, int) ] log.debug('DostLnWfpSioMuleDataParticle: particle=%s', result) return result
def _build_parsed_values(self): """ Take something in the sample format and split it into a OCR507 values (with an appropriate tag) @throws SampleException If there is a problem with sample creation """ match = SAMPLE_REGEX.match(self.raw_data) if not match: raise SampleException( "No regex match of parsed sample data: [%r]" % self.raw_data) # Parse the relevant ascii fields instrument_id = match.group('instrument_id') serial_number = match.group('serial_number') timer = float(match.group('timer')) # Ensure the expected values were present if not instrument_id: raise SampleException("No instrument id value parsed") if not serial_number: raise SampleException("No serial number value parsed") if not timer: raise SampleException("No timer value parsed") # Parse the relevant binary data """ Field Name Field Size (bytes) Description Format Char ---------- ------------------ ----------- ----------- sample_delay 2 BS formatted value h ch[1-7]_sample 4 BU formatted value I regulated_input_voltage 2 BU formatted value H analog_rail_voltage 2 BU formatted value H internal_temp 2 BU formatted value H frame_counter 1 BU formatted value B checksum 1 BU formatted value B """ try: sample_delay, ch1_sample, ch2_sample, ch3_sample, ch4_sample, ch5_sample, ch6_sample, ch7_sample, \ regulated_input_voltage, analog_rail_voltage, internal_temp, frame_counter, checksum \ = struct.unpack('!h7IHHHBB', match.group('binary_data') + match.group('checksum')) except struct.error, e: raise SampleException(e)
def _build_parsed_values(self): """ Take something in the autosample/TS format and split it into C, T, and D values (with appropriate tags) @throws SampleException If there is a problem with sample creation """ match = IRISDataParticle.regex_compiled().match(self.raw_data) if not match: raise SampleException( "No regex match of parsed sample data: [%s]" % self.raw_data) try: iris_time = match.group(1) timestamp = time.strptime(iris_time, "%Y/%m/%d %H:%M:%S") self.set_internal_timestamp(unix_time=time.mktime(timestamp)) ntp_timestamp = ntplib.system_to_ntp_time(time.mktime(timestamp)) x_tilt = float(match.group(2)) y_tilt = float(match.group(3)) temperature = float(match.group(4)) sn = str(match.group(5)) except ValueError: raise SampleException("ValueError while converting data: [%s]" % self.raw_data) result = [{ DataParticleKey.VALUE_ID: IRISDataParticleKey.TIME, DataParticleKey.VALUE: ntp_timestamp }, { DataParticleKey.VALUE_ID: IRISDataParticleKey.X_TILT, DataParticleKey.VALUE: x_tilt }, { DataParticleKey.VALUE_ID: IRISDataParticleKey.Y_TILT, DataParticleKey.VALUE: y_tilt }, { DataParticleKey.VALUE_ID: IRISDataParticleKey.TEMP, DataParticleKey.VALUE: temperature }, { DataParticleKey.VALUE_ID: IRISDataParticleKey.SN, DataParticleKey.VALUE: sn }] return result
def sieve_function(self, raw_data): """ This method sorts through the raw data to identify new blocks of data that need processing. This is needed instead of a regex because blocks are identified by position in this binary file. """ form_list = [] raw_data_len = len(raw_data) # Starting from the end of the buffer and working backwards parse_end_point = raw_data_len # We are going to go through the file data in reverse order since we have a # variable length status indicator field. # While we do not hit the beginning of the file contents, continue while parse_end_point > 0: # Create the different start indices for the three different scenarios raw_data_start_index_augmented = parse_end_point - STATUS_BYTES_AUGMENTED raw_data_start_index_normal = parse_end_point - STATUS_BYTES global_recovered_eng_rec_index = parse_end_point - WFP_E_GLOBAL_RECOVERED_ENG_DATA_SAMPLE_BYTES # Check for an an augmented status first if raw_data_start_index_augmented >= 0 and \ STATUS_START_MATCHER.match(raw_data[raw_data_start_index_augmented:parse_end_point]): log.trace("Found OffloadProfileData with decimation factor") parse_end_point = raw_data_start_index_augmented # Check for a normal status elif raw_data_start_index_normal >= 0 and \ STATUS_START_MATCHER.match(raw_data[raw_data_start_index_normal:parse_end_point]): log.trace("Found OffloadProfileData without decimation factor") parse_end_point = raw_data_start_index_normal # If neither, we are dealing with a global wfp_sio e recovered engineering data record, # so we will save the start and end points elif global_recovered_eng_rec_index >= 0: log.trace("Found OffloadEngineeringData") form_list.append( (global_recovered_eng_rec_index, parse_end_point)) parse_end_point = global_recovered_eng_rec_index # We must not have a good file, log some debug info for now else: log.debug("raw_data_start_index_augmented %d", raw_data_start_index_augmented) log.debug("raw_data_start_index_normal %d", raw_data_start_index_normal) log.debug("global_recovered_eng_rec_index %d", global_recovered_eng_rec_index) log.debug("bad file or bad position?") raise SampleException( "File size is invalid or improper positioning") return_list = form_list[::-1] return return_list
def _build_parsed_values(self): """ @throws SampleException If there is a problem with sample creation """ if self._parameters is None or self._streams is None: self._load_streams() if self.raw_data not in self._streams: raise SampleException('Unknown stream %r' % self.raw_data) self._data_particle_type = self.raw_data parameters = self._streams.get(self.raw_data, []) values = [] for param in parameters: if param in self._ignore: continue p = self._parameters.get(param) if p.parameter_type == 'function': continue log.trace('Generating random data for param: %s name: %s', param, p.name) val = None if p.value_encoding in ['str', 'string']: val = self.random_string(20) elif p.value_encoding == 'int8': val = random.choice(self.INT8_RANDOM) elif p.value_encoding == 'int16': val = random.choice(self.INT16_RANDOM) elif p.value_encoding == 'int32': val = random.choice(self.INT32_RANDOM) elif p.value_encoding == 'int64': val = random.choice(self.INT64_RANDOM) elif p.value_encoding == 'uint8': val = random.choice(self.UINT8_RANDOM) elif p.value_encoding == 'uint16': val = random.choice(self.UINT16_RANDOM) elif p.value_encoding == 'uint32': val = random.choice(self.UINT32_RANDOM) elif p.value_encoding == 'uint64': val = random.choice(self.UINT64_RANDOM) elif p.value_encoding in ['float32', 'float64']: val = random.choice(self.FLOAT_RANDOM) else: log.debug('Unhandled parameter value encoding: %s', p) if val is not None: if 'array' in p.parameter_type and p.value_encoding not in [ 'str', 'string' ]: val = [val] * 2 values.append({'value_id': p.name, 'value': val}) return values
def read_footer(self): """ Read the footer of the file including the end of profile marker (a record filled with \xFF), and the on and off timestamps for the profile. Use these to calculate the time increment, which is needed to be able to calculate the timestamp for each data sample record. @throws SampleException if the number of samples is not an even integer """ pad_bytes = 10 # seek backwards from end of file, give us extra 10 bytes padding in case # end of profile / timestamp is not right at the end of the file if self._filesize > (FOOTER_BYTES + pad_bytes): self._stream_handle.seek(-(FOOTER_BYTES + pad_bytes), 2) else: # if this file is too short, use a smaller number of pad bytes pad_bytes = self._filesize - FOOTER_BYTES self._stream_handle.seek(0) footer = self._stream_handle.read(FOOTER_BYTES + pad_bytes) # make sure we are at the end of the profile marker match = EOP_MATCHER.search(footer) if match: timefields = struct.unpack('>II', match.group(2)) self._start_time = int(timefields[0]) end_time = int(timefields[1]) extra_end_bytes = pad_bytes - match.start(1) number_samples = float(self._filesize - FOOTER_BYTES - extra_end_bytes) / float(DATA_RECORD_BYTES) if number_samples > 0: self._time_increment = float(end_time - self._start_time) / number_samples else: self._time_increment = 0.0 if not number_samples.is_integer(): raise SampleException( "File does not evenly fit into number of samples") if not self._read_state[StateKey.METADATA_SENT]: self.footer_data = (match.group(2), number_samples) # reset the file handle to the beginning of the file self._stream_handle.seek(0) else: raise SampleException( "Unable to find end of profile and timestamps, this file is no good!" )
def _build_parsed_values(self): result = [] data_stream = self.raw_data log.debug("OPTAA_StatusDataParticle: input = %s" %data_stream) ### This regex searching can be made a lot more specific, but at the expense of ### more code. For now, grabbing all three floating point numbers in one sweep is ### pretty efficient. Note, however, that if the manufacturer ever changes the ### format of the status display, this code may have to be re-written. FLOAT_REGEX = r'\d+\.\d+' float_regex_matcher = re.compile(FLOAT_REGEX) fp_results = re.findall(float_regex_matcher, data_stream) if len(fp_results) == 3: version = fp_results[0] bios = fp_results[1] picodos = fp_results[2] else: raise SampleException('Unable to find exactly three floating-point numbers in status message.') ### find the date/time string and remove enclosing parens DATE_REGEX = r'\([A-Za-z]+\s+\d+\s+\d{4}\s+\d+:\d+:\d+\)' date_regex_matcher = re.compile(DATE_REGEX) m = re.search(date_regex_matcher, data_stream) if m is not None: p = m.group() date_of_version = p[1:-1] else: date_of_version = 'None found' PERSISTOR_REGEX = r'Persistor CF2 SN:\d+' persistor_regex_matcher = re.compile(PERSISTOR_REGEX) persistor = re.search(persistor_regex_matcher, data_stream) if persistor is not None: temp = persistor.group() temp1 = re.search(r'\d{2,10}', temp) if temp1 is not None: persistor_sn = temp1.group() else: persistor_sn = 'None found' else: persistor_sn = 'None found' result = [{DataParticleKey.VALUE_ID: OPTAA_StatusDataParticleKey.FIRMWARE_VERSION, DataParticleKey.VALUE: str(version) }, {DataParticleKey.VALUE_ID: OPTAA_StatusDataParticleKey.FIRMWARE_DATE, DataParticleKey.VALUE: date_of_version }, {DataParticleKey.VALUE_ID: OPTAA_StatusDataParticleKey.PERSISTOR_CF_SERIAL_NUMBER, DataParticleKey.VALUE: int(persistor_sn) }, {DataParticleKey.VALUE_ID: OPTAA_StatusDataParticleKey.PERSISTOR_CF_BIOS_VERSION, DataParticleKey.VALUE: str(bios) }, {DataParticleKey.VALUE_ID: OPTAA_StatusDataParticleKey.PERSISTOR_CF_PICODOS_VERSION, DataParticleKey.VALUE: str(picodos) } ] log.debug("OPTAA_StatusDataParticle: result = %s" %result) return result