def _process_end_of_file(self): """ Confirm that the chunker does not have any extra bytes left at the end of the file """ (nd_timestamp, non_data) = self._chunker.get_next_non_data() (timestamp, chunk) = self._chunker.get_next_data() if non_data and len(non_data) > 0: log.warn("Have extra unexplained non-data bytes at the end of the file:%s", non_data) raise UnexpectedDataException("Have extra unexplained non-data bytes at the end of the file:%s" % non_data) elif chunk and len(chunk) > 0: log.warn("Have extra unexplained data chunk bytes at the end of the file:%s", chunk) raise UnexpectedDataException("Have extra unexplained data chunk bytes at the end of the file:%s" % chunk)
def parse_file(self): """ Parse through the file, pulling single lines and comparing to the established patterns, generating particles for data lines """ for line in self._stream_handle: # If this is a valid sensor data record, # use the extracted fields to generate a particle. sensor_match = SENSOR_DATA_MATCHER.match(line) if sensor_match is not None: particle = self._extract_sample(self._particle_class, None, sensor_match.groups(), None) # increment state for this chunk even if we don't get a particle self._record_buffer.append(particle) # It's not a sensor data record, see if it's a metadata record. else: # If it's a valid metadata record, ignore it. # Otherwise generate warning for unknown data. meta_match = METADATA_MATCHER.match(line) if meta_match is None: error_message = 'Unknown data found in chunk %s' % line log.warn(error_message) self._exception_callback( UnexpectedDataException(error_message))
def find_record_start(self): """ Find the start of the next record by looking for the sync marker :return: True if the end of the file was found, False if it was not """ end_of_file = False read_buffer = '' # read one byte at a time until the sync marker is found one_byte = self._file_handle.read(1) while one_byte != vel3d_velpt_common.SYNC_MARKER: # store anything we find before the sync marker in the read buffer read_buffer += one_byte one_byte = self._file_handle.read(1) if one_byte == '': # no more bytes to read, break out of this loop end_of_file = True break if len(read_buffer) > 1 and not DATE_TIME_MATCHER.match(read_buffer): # we expect a version of the file to have ascii date time strings prior to each record, if this # is something other than that call the exception msg = 'Found unexpected data 0x%s' % binascii.hexlify(read_buffer) log.warning(msg) self._exception_callback(UnexpectedDataException(msg)) return end_of_file
def parse_file(self): """ Parse out any pending data chunks in the chunker. If it is valid data, build a particle. Go until the chunker has no more valid data. @retval a list of tuples with sample particles encountered in this parsing, plus the state. """ for line in self.stream_handle: # If this is a valid sensor data record, # use the extracted fields to generate a particle. sensor_match = SENSOR_DATA_MATCHER.match(line) if sensor_match is not None: particle = self._extract_sample( CtdbpCdefCpInstrumentDataParticle, None, line, None) if particle is not None: self._record_buffer.append(particle) # It's not a sensor data record, see if it's a header record. else: # If it's a valid header record, ignore it. # Otherwise generate warning for unknown data. header_match = HEADER_MATCHER.match(line) log.debug("Header match: %s", str(header_match)) if header_match is None: error_message = 'Unknown data found in chunk %s' % line log.warn(error_message) self._exception_callback( UnexpectedDataException(error_message))
def parse_chunks(self): """ Parse out any pending data chunks in the chunker. If it is a valid data piece, build a particle, update the position and timestamp. Go until the chunker has no more valid data. @retval a list of tuples with sample particles encountered in this parsing, plus the state. An empty list of nothing was parsed. """ result_particles = [] (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index(clean=True) # if there is any non data handle it self.handle_non_data(non_data, non_end, start) while chunk is not None: header_match = SIO_HEADER_MATCHER.match(chunk) if header_match.group(SIO_HEADER_GROUP_ID) == 'CS': data_match = ENG_MATCHER.match(chunk) if data_match: # put timestamp from hex string to float: posix_time = int( header_match.group(SIO_HEADER_GROUP_TIMESTAMP), 16) log.debug('utc timestamp %s', datetime.utcfromtimestamp(posix_time)) timestamp = ntplib.system_to_ntp_time(float(posix_time)) # particle-ize the data block received, return the record sample = self._extract_sample(self._particle_class, None, data_match, internal_timestamp=timestamp) if sample: # create particle result_particles.append(sample) else: log.warn('CS data does not match REGEX') self._exception_callback( SampleException('CS data does not match REGEX')) # 'PS' IDs will also be in this file but are specifically ignored elif header_match.group(SIO_HEADER_GROUP_ID) != 'PS': message = 'Unexpected Sio Header ID %s' % header_match.group( SIO_HEADER_GROUP_ID) log.warn(message) self._exception_callback(UnexpectedDataException(message)) (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index(clean=True) # if there is any non data handle it self.handle_non_data(non_data, non_end, start) return result_particles
def parse_file(self): for line in self._stream_handle: # If this is a valid sensor data record, # use the extracted fields to generate a particle. # Check for match from Endurance with combined FLORT/CTDBP match = CTD_FLORT_DATA_MATCHER.match(line) if match is not None: particle = self._extract_sample(FlortDjInstrumentDataParticle, None, match) if particle is not None: self._record_buffer.append(particle) # It's not a sensor data record, see if it's a header record. else: # If it's a valid header record, ignore it. # Otherwise generate warning for unknown data. header_match = HEADER_MATCHER.match(line) log.debug('Header match: %s', str(header_match)) if header_match is None: error_message = 'Unknown data found in chunk %s' % line log.warn(error_message) self._exception_callback( UnexpectedDataException(error_message))
def parse_file(self): """ Parse through the file, pulling single lines and comparing to the established patterns, generating particles for data lines """ for line in self._stream_handle: # check for a match against the sensor data pattern match = SENSOR_DATA_MATCHER.match(line) if match is not None: log.debug('record found') # DCL controller timestamp is the port_timestamp port_timestamp = dcl_time_to_ntp( match.groups()[SENSOR_GROUP_TIMESTAMP]) data_particle = self._extract_sample( self._particle_class, None, match.groups(), port_timestamp=port_timestamp, preferred_ts=DataParticleKey.PORT_TIMESTAMP) self._record_buffer.append(data_particle) else: # check to see if this is any other expected format test_meta = METADATA_MATCHER.match(line) if test_meta is None or line.find(TAB) != -1: # something in the data didn't match a required regex, so raise an exception and press on. message = "Error while decoding parameters in data: [%s]" % line self._exception_callback(UnexpectedDataException(message))
def parse_chunks(self): """ Parse out any pending data chunks in the chunker. If it is valid data, build a particle. Go until the chunker has no more valid data. @retval a list of tuples with sample particles encountered in this parsing. """ result_particles = [] (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index(clean=True) self.handle_non_data(non_data, non_end, start) while chunk is not None: # If this is a valid sensor data record, # use the extracted fields to generate data particles. sensor_match = SENSOR_DATA_MATCHER.match(chunk) if sensor_match is not None: # First extract the ctdbp_cdef_ce_instrument_recovered particle data_particle = self._extract_sample( CtdbpCdefCeInstrumentDataParticle, None, chunk, None) if data_particle is not None: result_particles.append((data_particle, None)) # Then extract the ctdbp_cdef_ce_dosta_recovered particle dosta_particle = self._extract_sample( CtdbpCdefCeDostaDataParticle, None, chunk, None) if dosta_particle is not None: result_particles.append((dosta_particle, None)) # It's not a sensor data record, see if it's a header record. else: # If it's a valid header record, ignore it. # Otherwise generate warning for unknown data. header_match = HEADER_MATCHER.match(chunk) log.debug("Header match: %s", str(header_match)) if header_match is None: error_message = 'Unknown data found in chunk %s' % chunk log.warn(error_message) self._exception_callback( UnexpectedDataException(error_message)) (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index(clean=True) self.handle_non_data(non_data, non_end, start) return result_particles
def parse_file(self): """ Entry point into parsing the file Loop through the file one ensemble at a time """ position = 0 # set position to beginning of file header_id_bytes = self._stream_handle.read(2) # read the first two bytes of the file while header_id_bytes: # will be None when EOF is found if header_id_bytes == ADCPS_PD0_HEADER_REGEX: # get the ensemble size from the next 2 bytes (excludes checksum bytes) num_bytes = struct.unpack("<H", self._stream_handle.read(2))[0] self._stream_handle.seek(position) # reset to beginning of ensemble input_buffer = self._stream_handle.read(num_bytes + 2) # read entire ensemble if len(input_buffer) == num_bytes + 2: # make sure there are enough bytes including checksum try: pd0 = AdcpPd0Record(input_buffer, glider=self._glider) velocity = self._particle_classes['velocity'](pd0) self._record_buffer.append(velocity) config = self._particle_classes['config'](pd0) engineering = self._particle_classes['engineering'](pd0) for particle in [config, engineering]: if self._changed(particle): self._record_buffer.append(particle) if hasattr(pd0, 'bottom_track'): bt = self._particle_classes['bottom_track'](pd0) bt_config = self._particle_classes['bottom_track_config'](pd0) self._record_buffer.append(bt) if self._changed(bt_config): self._record_buffer.append(bt_config) except PD0ParsingException: # seek to just past this header match # self._stream_handle.seek(position + 2) self._exception_callback(RecoverableSampleException("Exception parsing PD0")) else: # reached EOF log.warn("not enough bytes left for complete ensemble") self._exception_callback(UnexpectedDataException("Found incomplete ensemble at end of file")) else: # did not get header ID bytes log.warn('did not find header ID bytes') self._exception_callback(RecoverableSampleException( "Did not find Header ID bytes where expected, trying next 2 bytes")) position = self._stream_handle.tell() # set the new file position header_id_bytes = self._stream_handle.read(2) # read the next two bytes of the file
def handle_non_data(self, non_data, non_end, start): """ Handle any non-data that is found in the file """ # Handle non-data here by calling the exception callback. if non_data is not None and non_end <= start: self._exception_callback(UnexpectedDataException( "Found %d bytes of un-expected non-data %s" % (len(non_data), non_data)))
def parse_chunks(self): """ Parse out any pending data chunks in the chunker. If it is valid data, build a particle. Go until the chunker has no more valid data. @retval a list of tuples with sample particles encountered in this parsing, plus the state. """ result_particles = [] nd_timestamp, non_data, non_start, non_end = self._chunker.get_next_non_data_with_index( clean=False) timestamp, chunk, start, end = self._chunker.get_next_data_with_index( clean=True) self.handle_non_data(non_data, non_end, start) # If not set from config & no InstrumentParameterException error from constructor if self.particle_classes is None: self.particle_classes = (self._particle_class, ) while chunk: for particle_class in self.particle_classes: if hasattr(particle_class, "data_matcher"): self.sensor_data_matcher = particle_class.data_matcher # If this is a valid sensor data record, # use the extracted fields to generate a particle. sensor_match = self.sensor_data_matcher.match(chunk) if sensor_match is not None: break if sensor_match is not None: particle = self._extract_sample(particle_class, None, sensor_match.groups(), None) if particle is not None: result_particles.append((particle, None)) # It's not a sensor data record, see if it's a metadata record. else: # If it's a valid metadata record, ignore it. # Otherwise generate warning for unknown data. meta_match = self.metadata_matcher.match(chunk) if meta_match is None: error_message = 'Unknown data found in chunk %s' % chunk log.warn(error_message) self._exception_callback( UnexpectedDataException(error_message)) nd_timestamp, non_data, non_start, non_end = self._chunker.get_next_non_data_with_index( clean=False) timestamp, chunk, start, end = self._chunker.get_next_data_with_index( clean=True) self.handle_non_data(non_data, non_end, start) return result_particles
def we_split_function(self, raw_data): """ Sort through the raw data to identify new blocks of data that need processing. """ form_list = [] """ The Status messages can have an optional 2 bytes on the end, and since the rest of the data consists of relatively unformated packed binary records, detecting the presence of that optional 2 bytes can be difficult. The only pattern we have to detect is the STATUS_START field ( 4 bytes FF FF FF F[A-F]). We peel this appart by parsing backwards, using the end-of-record as an additional anchor point. """ parse_end_point = len(raw_data) while parse_end_point > 0: # look for a status message at postulated message header position header_start = STATUS_BYTES_AUGMENTED # look for an augmented status if STATUS_START_MATCHER.match( raw_data[parse_end_point - STATUS_BYTES_AUGMENTED:parse_end_point]): # A hit for the status message at the augmented offset # NOTE, we don't need the status messages and only deliver a stream of # samples to build_parsed_values parse_end_point = parse_end_point - STATUS_BYTES_AUGMENTED # check if this is an unaugmented status elif STATUS_START_MATCHER.match( raw_data[parse_end_point - STATUS_BYTES:parse_end_point]): # A hit for the status message at the unaugmented offset # NOTE: same as above parse_end_point = parse_end_point - STATUS_BYTES else: # assume if not a stat that hit above, we have a sample. Mis-parsing will result # in extra bytes at the end and a sample exception. form_list.append( (parse_end_point - E_GLOBAL_SAMPLE_BYTES, parse_end_point)) parse_end_point = parse_end_point - E_GLOBAL_SAMPLE_BYTES # if the remaining bytes are less than data sample bytes, all we might have left is a status sample if parse_end_point != 0 and parse_end_point < STATUS_BYTES \ and parse_end_point < E_GLOBAL_SAMPLE_BYTES \ and parse_end_point < STATUS_BYTES_AUGMENTED: self._exception_callback( UnexpectedDataException( "Error sieving WE data, inferred sample/status alignment incorrect" )) return_list = [] return return_list # Because we parsed this backwards, we need to reverse the list to deliver the data in the correct order return_list = form_list[::-1] log.debug("returning we sieve/split list %s", return_list) return return_list
def handle_non_data(self, non_data, non_end, start): """ Check for and handle any non-data that is found in the file """ # If there is non-data it is an error if non_data is not None and non_end <= start: message = "Found %d bytes of un-expected non-data" % len(non_data) log.warn(message) # if non-data is a fatal error, directly call the exception, if it is not use the _exception_callback self._exception_callback(UnexpectedDataException(message))
def handle_non_data(self, non_data, non_end, start): """ Handle any non-data that is found in the file """ # Handle non-data here. if non_data is not None and non_end <= start: message = "Found %d bytes of un-expected non-data %s" % ( len(non_data), binascii.b2a_hex(non_data)) log.warn(message) self._exception_callback(UnexpectedDataException(message))
def parse_file(self): """ Entry point into parsing the file, loop over each line and interpret it until the entire file is parsed """ stored_start_timestamp = None # read the first line in the file line = self._stream_handle.readline() while line: # data will be at start of line so use match data_match = DATA_START_MATCHER.match(line) # instrument started may be in middle so use search log_match = LOG_START_MATCHER.match(line) if data_match: # found a data line dcl_timestamp = data_match.group(1) fields = line[START_N_CHARS:].split(',') if len(fields) != N_FIELDS: msg = 'Expected %d fields but recieved %d' % (N_FIELDS, len(fields)) log.warn(msg) self._exception_callback(SampleException(msg)) else: # create an array of the fields to parse in the particle raw_data = [stored_start_timestamp, dcl_timestamp] raw_data.extend(fields) # extract this particle particle = self._extract_sample(self.particle_class, None, raw_data, None) self._record_buffer.append(particle) stored_start_timestamp = None elif log_match: # pull out whatever text is within the log log_contents = log_match.group(2) # there are two cases, a log message simply contains the 'Instrument Started' text, or it contains # an entire other log message which may contain 'Instrument Started' instr_log_match = INSTRUMENT_STARTED_MATCHER.match(log_contents) full_log_instr_match = INSTRUMENT_START_LOG_MATCHER.match(log_contents) # text other than instrument started is ignored within log messages if instr_log_match: # found a line containing a single log instrument started, hold on to it until we get a data line stored_start_timestamp = log_match.group(1) elif full_log_instr_match: # found a log within a log, use the inner timestamp associated with the instrument start stored_start_timestamp = full_log_instr_match.group(1) else: msg = 'Data with unexpected format received: %s' % line log.warn(msg) self._exception_callback(UnexpectedDataException(msg)) line = self._stream_handle.readline()
def parse_chunks(self): """ Parse out any pending data chunks in the chunker. If it is valid data, build a particle. Go until the chunker has no more valid data. @retval a list of tuples with sample particles encountered in this parsing, plus the state. """ result_particles = [] (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index(clean=True) self.handle_non_data(non_data, non_end, start) while chunk is not None: self._increment_position(len(chunk)) # If this is a valid sensor data record, # use the extracted fields to generate a particle. sensor_match = SENSOR_DATA_MATCHER.match(chunk) if sensor_match is not None: particle = self._extract_sample(self.particle_class, None, sensor_match.groups(), None) if particle is not None: result_particles.append( (particle, copy.copy(self._read_state))) # It's not a sensor data record, see if it's a metadata record. else: # If it appears to be a metadata record, # look for multiple lines which have been garbled, # i.e., a metadata record minus the newline # plus tab-separated values from a following sensor data record. # find returns -1 if not found. # Valid Metadata records produce no particles and # are silently ignored. meta_match = METADATA_MATCHER.match(chunk) if meta_match is None or chunk.find(TAB) != -1: error_message = 'Unknown data found in chunk %s' % chunk log.warn(error_message) self._exception_callback( UnexpectedDataException(error_message)) (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index(clean=True) self.handle_non_data(non_data, non_end, start) return result_particles
def _read_file_definition(self): """ Read the first 14 lines of the data file for the file definitions, values are colon delimited key value pairs. The pairs are parsed and stored in header_dict member. """ row_count = 0 # # THIS METHOD ASSUMES A 14 ROW HEADER # If the number of header row lines in the glider ASCII input file changes from 14, # this method will NOT WORK num_hdr_lines = 14 header_pattern = r'(.*): (.*)$' header_re = re.compile(header_pattern) while row_count < num_hdr_lines: line = self._stream_handle.readline() # check if this line is empty if len(line) == 0: raise SampleException("GliderParser._read_file_definition(): Header line is empty") match = header_re.match(line) if match: key = match.group(1) value = match.group(2) value = value.strip() #log.debug("header key: %s, value: %s", key, value) # update num_hdr_lines based on the header info. if key in ['num_ascii_tags', 'num_label_lines', 'sensors_per_cycle']: value = int(value) # create a dictionary of these 3 key/value pairs integers from # the header rows that need to be saved for future use self._header_dict[key] = value elif key in ['filename_label', 'mission_name', 'fileopen_time']: # create a dictionary of these 3 key/value pairs strings from # the header rows that need to be saved for future use self._header_dict[key] = value else: log.warn("Failed to parse header row: %s.", line) row_count += 1 num_ascii_tags_value = self._header_dict.get('num_ascii_tags') if num_ascii_tags_value != num_hdr_lines: raise UnexpectedDataException("GliderParser._read_file_definition(): " "Header is not 14 rows, num_ascii_tags = %s" % num_ascii_tags_value)
def parse_chunks(self): """ Parse out any pending data chunks in the chunker. If it is a valid data piece, build a particle, update the position and timestamp. Go until the chunker has no more valid data. @retval a list of tuples with sample particles encountered in this parsing, plus the state. An empty list of nothing was parsed. """ (timestamp, chunk) = self._chunker.get_next_data() while (chunk is not None): # Parse/match the SIO header sio_header_match = SIO_HEADER_MATCHER.match(chunk) end_of_header = sio_header_match.end(0) if sio_header_match.group(1) == 'WE': # Parse/match the E file header e_header_match = E_HEADER_MATCHER.search( chunk[end_of_header:end_of_header + HEADER_BYTES]) if e_header_match: # '-1' to remove the '\x03' end-of-record marker payload = chunk[end_of_header + HEADER_BYTES:-1] self._process_we_record(payload) else: message = "Found unexpected data." log.warn(message) self._exception_callback(UnexpectedDataException(message)) else: # no e header match message = "Found unexpected data." log.warn(message) self._exception_callback(UnexpectedDataException(message)) (timestamp, chunk) = self._chunker.get_next_data() return self._result_particles
def handle_non_data(self, non_data, non_end, start): """ This method handles any non-data that is found in the file """ # if non-data is expected, handle it here, otherwise it is an error if non_data is not None and non_end <= start: # if this non-data is an error, send an UnexpectedDataException and increment the state self._increment_state(len(non_data)) # if non-data is a fatal error, directly call the exception, if it is not use the _exception_callback self._exception_callback(UnexpectedDataException("Found %d bytes of un-expected non-data %s" % (len(non_data), non_data)))
def handle_non_data(self, non_data, non_end, start): """ Handle any non-data found in the file :param non_data: Non data :param non_end: end index of non data :param start: start index of data """ if non_data is not None and non_end <= start: message = "Found %d bytes of unexpected non-data %s" % (len(non_data), binascii.b2a_hex(non_data)) log.warn(message) self._exception_callback(UnexpectedDataException(message))
def handle_non_data(self, non_data, non_end, start): """ Handle any non-data that is found in the file """ # if non-data is expected, handle it here, otherwise it is an error if non_data is not None and non_end <= start: log.debug("non_data: %s", non_data) # if non-data is a fatal error, directly call the exception, if it is not use the _exception_callback self._exception_callback( UnexpectedDataException( "Found %d bytes of un-expected non-data %s" % (len(non_data), non_data)))
def parse_file(self): """ This method will parse a nutnr_b input file and collect the particles. """ # Read the first line in the file line = self._stream_handle.readline() # While a new line in the file exists while line: first_power_line_match = FIRST_POWER_LINE_MATCHER.match(line) firmware_version_line_match = FIRMWARE_VERSION_LINE_MATCHER.match( line) ignore_match = IGNORE_MATCHER.match(line) instrument_line_match = INSTRUMENT_LINE_MATCHER.match(line) if ignore_match: log.debug("Found ignore match. Line: %s", line) elif first_power_line_match: log.debug("Found match. Line: %s", line) self._process_startup_time_match(first_power_line_match) elif firmware_version_line_match: log.debug("Found match. Line: %s", line) self._process_firmware_version_line_match( firmware_version_line_match) elif instrument_line_match: log.debug("Found match. Line: %s", line) self._process_instrument_line_match(instrument_line_match) # OK. We found a line in the file we were not expecting. Let's log a warning # and report a unexpected data exception. else: # If we did not get a match against part of an instrument # data record, we may have a bad file message = "Unexpected data in file, line: " + line log.warn(message) self._exception_callback(UnexpectedDataException(message)) # Read the next line in the file line = self._stream_handle.readline() # Set an indication that the file was fully parsed self._file_parsed = True
def handle_non_data(self, non_data, non_end, start): """ Handle any non-data that is found in the file """ # Handle non-data here. # Increment the position within the file. # Use the _exception_callback. if non_data is not None and non_end <= start: self._increment_position(len(non_data)) self._exception_callback(UnexpectedDataException( "Found %d bytes of un-expected non-data %s" % (len(non_data), non_data)))
def handle_non_data(self, non_data, non_end, start): """ Handle any non-data that is found in the file """ # rx failure matches are expected non data, otherwise it is an error if non_data is not None and non_end <= start: # this non-data is an error, send an UnexpectedDataException and increment the state self._increment_state(len(non_data)) log.debug("Found %d bytes of unexpected non-data" % len(non_data)) # if non-data is a fatal error, directly call the exception, if it is not use the _exception_callback self._exception_callback(UnexpectedDataException("Found %d bytes of un-expected non-data %s" % (len(non_data), non_data)))
def handle_non_data(self, non_data, non_end, start): """ Handle any non-data that is found in the file """ # Handle non-data here. if non_data is not None and non_end <= start: # send an UnexpectedDataException and increment the state self._increment_state(len(non_data)) self._exception_callback( UnexpectedDataException( "Found %d bytes of un-expected non-data %s" % (len(non_data), binascii.b2a_hex(non_data))))
def handle_non_data(self, non_data, non_end, start): """ handle data in the non_data chunker queue @param non_data data in the non data chunker queue @param non_end ending index of the non_data chunk @param start start index of the next data chunk """ # we can get non_data after our current chunk, check that this chunk is before that chunk if non_data is not None and non_end <= start: log.error("Found %d bytes of unexpected non-data:%s", len(non_data), non_data) self._exception_callback(UnexpectedDataException("Found %d bytes of un-expected non-data:%s" % (len(non_data), non_data))) self._increment_state(len(non_data))
def handle_non_data(self, non_data, non_end, start): """ Handle any non-data that is found in the file """ # non-data is not expected, if found it is an error if non_data is not None and non_end <= start: log.debug("non_data: %s", non_data) # send an UnexpectedDataException and increment the state self._exception_callback( UnexpectedDataException( "Found %d bytes of un-expected non-data %s" % (len(non_data), non_data)))
def _handle_non_match(self, line): # Check for other lines that can be ignored if (re.match(LOGGING_REGEX, line) or re.match(HEADER_BEGIN_REGEX, line) or re.match(DATA_FORMAT_REGEX, line) or re.match(DATA_BEGIN_REGEX, line) or re.match(DATA_END_REGEX, line)): log.debug("Ignoring line: %s", line) else: # Exception callback message = "Unexpected data found. Line: " + line log.warn(message) self._exception_callback(UnexpectedDataException(message))
def parse_chunks(self): """ Parse out any pending data chunks in the chunker. If it is valid data, build a particle. Go until the chunker has no more valid data. @retval a list of tuples with sample particles encountered in this parsing, plus the state. """ result_particles = [] (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index(clean=True) self.handle_non_data(non_data, non_end, start) while chunk is not None: self._increment_position(len(chunk)) # If this is a valid sensor data record, # use the extracted fields to generate a particle. sensor_match = SENSOR_DATA_MATCHER.match(chunk) if sensor_match is not None: particle = self._extract_sample(self.particle_class, None, sensor_match.groups(), None) if particle is not None: result_particles.append( (particle, copy.copy(self._read_state))) # It's not a sensor data record, see if it's a metadata record. else: # If it's a valid metadata record, ignore it. # Otherwise generate warning for unknown data. meta_match = METADATA_MATCHER.match(chunk) if meta_match is None: error_message = 'Unknown data found in chunk %s' % chunk log.warn(error_message) self._exception_callback( UnexpectedDataException(error_message)) (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index(clean=True) self.handle_non_data(non_data, non_end, start) return result_particles
def parse_chunks(self): """ Parse out any pending data chunks in the chunker. If it is a valid data piece, build a particle, update the position and timestamp. Go until the chunker has no more valid data. @retval a list of tuples with sample particles encountered in this parsing, plus the state. An empty list of nothing was parsed. """ result_particles = [] (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index() self.handle_non_data(non_data, non_end, start) while chunk is not None: header_match = SIO_HEADER_MATCHER.match(chunk) if header_match.group(1) == 'FL': data_match = DATA_MATCHER.search(chunk) if data_match: log.debug('Found data match in chunk %s', chunk[1:32]) # particle-ize the data block received, return the record # prepend the timestamp from sio mule header to the flort raw data, # which is stored in header_match.group(3) sample = self._extract_sample( self._particle_class, None, header_match.group(3) + data_match.group(0), preferred_ts=DataParticleKey.PORT_TIMESTAMP) if sample: # create particle result_particles.append(sample) else: # We found a line in the file that was unexpected. Since we are continuing, # just log a warning. warn_str = "Unexpected data in beginning of header: " log.warn(warn_str + "%s", header_match.group(1)) message = warn_str + header_match.group(1) self._exception_callback(UnexpectedDataException(message)) (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index() self.handle_non_data(non_data, non_end, start) return result_particles