def parse_file(self): """ Parse through the file, pulling single lines and comparing to the established patterns, generating particles for data lines """ for line in self._stream_handle: match = BEGIN_MATCHER.match(line) if match is not None: count = match.group(DataMatchesGroupNumber.NUM_WAVELENGTHS) data_regex = self._build_data_regex(BEGIN_REGEX, count) fields = re.match(data_regex, line) if fields is not None: self._process_data_match(fields, self._record_buffer) else: # did not match the regex log.warn("line did not match regex %s", line) self._exception_callback(RecoverableSampleException("Found an invalid line: %s" % line)) else: # Check for head part match header_part_match = HEADER_PART_MATCHER.match(line) if header_part_match is not None: self._process_header_part_match(header_part_match) else: self._process_line_not_containing_data_record_or_header_part(line)
def parse_file(self): """ Parse through the file, pulling single lines and comparing to the established patterns, generating particles for data lines """ for line in self._stream_handle: match = BEGIN_MATCHER.match(line) if match is not None: count = match.group(DataMatchesGroupNumber.NUM_WAVELENGTHS) data_regex = self._build_data_regex(BEGIN_REGEX, count) fields = re.match(data_regex, line) if fields is not None: self._process_data_match(fields, self._record_buffer) else: # did not match the regex log.warn("line did not match regex %s", line) self._exception_callback( RecoverableSampleException( "Found an invalid line: %s" % line)) else: # Check for head part match header_part_match = HEADER_PART_MATCHER.match(line) if header_part_match is not None: self._process_header_part_match(header_part_match) else: self._process_line_not_containing_data_record_or_header_part( line)
def parse_file(self): for line in self._stream_handle: battery_match = BATTERY_DATA_MATCHER.match(line) gps_match = GPS_DATA_MATCHER.match(line) # If we found a data match, let's process it if battery_match is not None: self._process_data_match(self._battery_status_class, battery_match) elif gps_match is not None: self._process_data_match(self._gps_adjustment_class, gps_match) else: # Check for head part match header_part_match = HEADER_PART_MATCHER.match(line) if header_part_match is not None: self._process_header_part_match(header_part_match) elif HEX_ASCII_LINE_MATCHER.match(line): self._process_line_not_containing_data_record_or_header_part(line) elif not TIMESTAMP_LINE_MATCHER.match(line) and not \ (IGNORE_MATCHER is not None and IGNORE_MATCHER.match(line)): log.warn("non_data: %s", line) self._exception_callback(RecoverableSampleException("Found d bytes"))
def parse_chunks(self): """ Parse out any pending data chunks in the chunker. If it is a valid data piece, build a particle, update the timestamp. Go until the chunker has no more valid data. @retval a list of tuples with sample particles encountered in this parsing. An empty list is returned if nothing was parsed. """ # Initialize the result particles list we will return result_particles = [] # Retrieve the next non data chunk (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) # Retrieve the next data chunk (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index(clean=True) # Process the non data self.handle_non_data(non_data, non_end, start) # While the data chunk is not None, process the data chunk while chunk is not None: # Look for match in beginning part of the regex match = BEGIN_MATCHER.match(chunk) if match is not None: count = match.group(DataMatchesGroupNumber.NUM_WAVELENGTHS) data_regex = self._build_data_regex(BEGIN_REGEX, count) fields = re.match(data_regex, chunk) if fields is not None: self._process_data_match(fields, result_particles) else: # did not match the regex log.warn("chunk did not match regex %s", chunk) self._exception_callback(RecoverableSampleException("Found an invalid chunk: %s" % chunk)) else: # Check for head part match header_part_match = HEADER_PART_MATCHER.match(chunk) if header_part_match is not None: self._process_header_part_match(header_part_match) else: self._process_chunk_not_containing_data_record_or_header_part(chunk) # Retrieve the next non data chunk (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) # Retrieve the next data chunk (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index(clean=True) # Process the non data self.handle_non_data(non_data, non_end, start) return result_particles
def parse_chunks(self): """ Parse out any pending data chunks in the chunker. If it is a valid data piece, build a particle, update the position and timestamp. Go until the chunker has no more valid data. @retval a list of tuples with sample particles encountered in this parsing, plus the state. An empty list of nothing was parsed. """ # Initialize the result particles list we will return result_particles = [] # Retrieve the next non data chunk (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) # Retrieve the next data chunk (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index(clean=True) # Process the non data self.handle_non_data(non_data, non_end, start) # While the data chunk is not None, process the data chunk while chunk is not None: # Increment the read state position now self._increment_read_state(len(chunk)) battery_match = BATTERY_DATA_MATCHER.match(chunk) gps_match = GPS_DATA_MATCHER.match(chunk) # If we found a data match, let's process it if battery_match is not None: self._process_data_match(self._battery_status_class, battery_match, result_particles) elif gps_match is not None: self._process_data_match(self._gps_adjustment_class, gps_match, result_particles) else: # Check for head part match header_part_match = HEADER_PART_MATCHER.match(chunk) if header_part_match is not None: self._process_header_part_match(header_part_match) else: self._process_chunk_not_containing_data_record_or_header_part(chunk) # Retrieve the next non data chunk (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) # Retrieve the next data chunk (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index(clean=True) # Process the non data self.handle_non_data(non_data, non_end, start) return result_particles
def parse_file(self): """ Parse NUTNR J CSPP text file. """ # loop over all lines in the data file and parse the data to generate Winch CSPP particles for line in self._stream_handle: data_match = DATA_MATER.match(line) # If we found a data match, let's process it if data_match is not None: self._process_data_match(data_match) else: # Check for head part match header_part_match = HEADER_PART_MATCHER.match(line) if header_part_match is not None: header_part_key = header_part_match.group( HeaderPartMatchesGroupNumber.HEADER_PART_MATCH_GROUP_KEY) header_part_value = header_part_match.group( HeaderPartMatchesGroupNumber.HEADER_PART_MATCH_GROUP_VALUE) if header_part_key in self._header_state.keys(): self._header_state[header_part_key] = string.rstrip(header_part_value) else: if HEX_ASCII_LINE_MATCHER.match(line): # we found a line starting with the timestamp, depth, and # suspect timestamp, followed by all hex ascii chars log.warn('got hex ascii corrupted data %s ', line) self._exception_callback(RecoverableSampleException( "Found hex ascii corrupted data: %s" % line)) # ignore the expected timestamp line and any lines matching the ignore regex, # otherwise data is unexpected elif not TIMESTAMP_LINE_MATCHER.match(line) and not \ (IGNORE_MATCHER is not None and IGNORE_MATCHER.match(line)): # Unexpected data was found log.warn('got unrecognized row %s', line) self._exception_callback(RecoverableSampleException("Found an invalid chunk: %s" % line))
def parse_chunks(self): """ Parse out any pending data chunks in the chunker. If it is a valid data piece, build a particle, update the position and timestamp. Go until the chunker has no more valid data. @retval a list of tuples with sample particles encountered in this parsing, plus the state. An empty list of nothing was parsed. """ # Initialize the result particles list we will return result_particles = [] # Retrieve the next non data chunk (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) # Retrieve the next data chunk (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index(clean=True) # Process the non data self.handle_non_data(non_data, non_end, start) # While the data chunk is not None, process the data chunk while chunk is not None: # Increment the read state position now self._increment_read_state(len(chunk)) battery_match = BATTERY_DATA_MATCHER.match(chunk) gps_match = GPS_DATA_MATCHER.match(chunk) # If we found a data match, let's process it if battery_match is not None: self._process_data_match(self._battery_status_class, battery_match, result_particles) elif gps_match is not None: self._process_data_match(self._gps_adjustment_class, gps_match, result_particles) else: # Check for head part match header_part_match = HEADER_PART_MATCHER.match(chunk) if header_part_match is not None: self._process_header_part_match(header_part_match) else: self._process_chunk_not_containing_data_record_or_header_part( chunk) # Retrieve the next non data chunk (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) # Retrieve the next data chunk (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index(clean=True) # Process the non data self.handle_non_data(non_data, non_end, start) return result_particles
def parse_chunks(self): """ Parse out any pending data chunks in the chunker. If it is a valid data piece, build a particle, update the timestamp. Go until the chunker has no more valid data. @retval a list of tuples with sample particles encountered in this parsing. An empty list is returned if nothing was parsed. """ # Initialize the result particles list we will return result_particles = [] # Retrieve the next non data chunk (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) # Retrieve the next data chunk (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index(clean=True) # Process the non data self.handle_non_data(non_data, non_end, start) # While the data chunk is not None, process the data chunk while chunk is not None: # Look for match in beginning part of the regex match = BEGIN_MATCHER.match(chunk) if match is not None: count = match.group(DataMatchesGroupNumber.NUM_WAVELENGTHS) data_regex = self._build_data_regex(BEGIN_REGEX, count) fields = re.match(data_regex, chunk) if fields is not None: self._process_data_match(fields, result_particles) else: # did not match the regex log.warn("chunk did not match regex %s", chunk) self._exception_callback( RecoverableSampleException( "Found an invalid chunk: %s" % chunk)) else: # Check for head part match header_part_match = HEADER_PART_MATCHER.match(chunk) if header_part_match is not None: self._process_header_part_match(header_part_match) else: self._process_chunk_not_containing_data_record_or_header_part( chunk) # Retrieve the next non data chunk (nd_timestamp, non_data, non_start, non_end) = self._chunker.get_next_non_data_with_index(clean=False) # Retrieve the next data chunk (timestamp, chunk, start, end) = self._chunker.get_next_data_with_index(clean=True) # Process the non data self.handle_non_data(non_data, non_end, start) return result_particles