Beispiel #1
0
    def __init__(self,
                 raw_data,
                 port_timestamp=None,
                 internal_timestamp=None,
                 preferred_timestamp=DataParticleKey.PORT_TIMESTAMP,
                 quality_flag=DataParticleValue.OK,
                 new_sequence=None):
        super(PhsenParserDataParticle, self).__init__(
            raw_data,
            port_timestamp=None,
            internal_timestamp=None,
            preferred_timestamp=DataParticleKey.PORT_TIMESTAMP,
            quality_flag=DataParticleValue.OK,
            new_sequence=None)
        timestamp_match = TIMESTAMP_MATCHER.match(self.raw_data[:8])
        if not timestamp_match:
            raise RecoverableSampleException("PhsenParserDataParticle: No regex match of " \
                                             "timestamp [%s]" % self.raw_data[:8])
        self._data_match = DATA_MATCHER.match(self.raw_data[8:])
        if not self._data_match:
            raise RecoverableSampleException("PhsenParserDataParticle: No regex match of " \
                                             "parsed sample data [%s]" % self.raw_data[8:])

        # use the timestamp from the sio header as internal timestamp
        sec_since_1970 = int(self.raw_data[:8], 16)
        self.set_internal_timestamp(unix_time=sec_since_1970)
Beispiel #2
0
    def __init__(self,
                 raw_data,
                 port_timestamp=None,
                 internal_timestamp=None,
                 preferred_timestamp=DataParticleKey.PORT_TIMESTAMP,
                 quality_flag=DataParticleValue.OK,
                 new_sequence=None):
        super(FlortdCommonParserDataParticle,
              self).__init__(raw_data, port_timestamp, internal_timestamp,
                             preferred_timestamp, quality_flag, new_sequence)
        # the raw data has the timestamp from the sio header pre-pended to it, match the first 8 bytes
        timestamp_match = TIMESTAMP_MATCHER.match(self.raw_data[:8])
        if not timestamp_match:
            raise RecoverableSampleException(
                "FlortdParserDataParticle: No regex match of timestamp [%s]" %
                self.raw_data[:8])
        # now match the flort data, excluding the sio header timestamp in the first 8 bytes
        self._data_match = DATA_MATCHER.match(self.raw_data[8:])
        if not self._data_match:
            raise RecoverableSampleException(
                "FlortdParserDataParticle: No regex match of \
                                              parsed sample data [%s]",
                self.raw_data[8:])

        # use the timestamp from the sio header as internal timestamp
        sec_since_1970 = int(self.raw_data[:8], 16)
        self.set_internal_timestamp(unix_time=sec_since_1970)
    def __init__(self,
                 raw_data,
                 port_timestamp=None,
                 internal_timestamp=None,
                 preferred_timestamp=DataParticleKey.PORT_TIMESTAMP,
                 quality_flag=DataParticleValue.OK,
                 new_sequence=None):
        super(DostadParserDataParticle, self).__init__(
            raw_data,
            port_timestamp=None,
            internal_timestamp=None,
            preferred_timestamp=DataParticleKey.PORT_TIMESTAMP,
            quality_flag=DataParticleValue.OK,
            new_sequence=None)
        # the raw data has the timestamp from the sio header pre-pended to it, match the first 8 bytes
        timestamp_match = TIMESTAMP_MATCHER.match(self.raw_data[:8])
        if not timestamp_match:
            raise RecoverableSampleException("DostaParserDataParticle: No regex match of " \
                                             "timestamp [%s]" % self.raw_data[:8])
        # now match the dosta data, excluding the sio header timestamp in the first 8 bytes
        self._data_match = DATA_MATCHER.match(self.raw_data[8:])
        if not self._data_match:
            raise RecoverableSampleException("DostaParserDataParticle: No regex match of " \
                                              "parsed sample data [%s]" % self.raw_data[8:])

        posix_time = int(timestamp_match.group(0), 16)
        self.set_internal_timestamp(unix_time=float(posix_time))
Beispiel #4
0
    def _process_data_match(self, data_match):
        """
        This method processes a data match.  It will extract a metadata particle and insert it into
        the record_buffer when we have not already extracted the metadata and all header values exist.
        This method will also extract a data particle and append it to the the record_buffer.
        @param data_match A regular expression match object for a cspp data record
        """

        params = data_match.group(0).split('\t')
        frame_type = params[3]
        data_particle = None

        # Extract the data record particle
        if frame_type == 'SLB':  # light frame
            data_particle = self._extract_sample(self._light_particle_class,
                                                 None,
                                                 data_match)
        elif frame_type == 'SDB':  # dark frame
            data_particle = self._extract_sample(self._dark_particle_class,
                                                 None,
                                                 data_match)
        else:
            log.warn('got invalid frame type %s', frame_type)
            self._exception_callback(RecoverableSampleException('got invalid frame type %s' % frame_type))

        # If we created a data particle, let's append the particle to the result particles
        # to return and increment the state data positioning
        if data_particle:

            if not self._metadata_extracted:
                # Once the first data particle is read, all available header lines will
                # have been read and inserted into the header state dictionary.
                # Only the source file is required to create a metadata particle.

                if self._header_state[DefaultHeaderKey.SOURCE_FILE] is not None:
                    metadata_particle = self._extract_sample(self._metadata_particle_class,
                                                             None,
                                                             (copy.copy(self._header_state),
                                                              data_match))
                    if metadata_particle:
                        # We're going to insert the metadata particle so that it is
                        # the first in the list and set the position to 0, as it cannot
                        # have the same position as the non-metadata particle
                        self._record_buffer.insert(0, metadata_particle)
                    else:
                        # metadata particle was not created successfully
                        log.warn('Unable to create metadata particle')
                        self._exception_callback(RecoverableSampleException(
                            'Unable to create metadata particle'))
                else:
                    # no source file path, don't create metadata particle
                    log.warn('No source file, not creating metadata particle')
                    self._exception_callback(RecoverableSampleException(
                        'No source file, not creating metadata particle'))

                # need to set metadata extracted to true so we don't keep creating
                # the metadata, even if it failed
                self._metadata_extracted = True

            self._record_buffer.append(data_particle)
Beispiel #5
0
    def _build_parsed_values(self):
        """
        Encode the common and bands parameters from the raw data using the particle maps, and extract the non
        directional psd array
        """
        particle_parameters = super(WavssADclNonDirectionalDataParticle,
                                    self)._build_parsed_values()

        band_len = len(self.band_parameter_types)
        if len(self.payload) < (band_len + 2):
            raise RecoverableSampleException(
                'missing bands particle map header data')

        bands_header = self.payload[:band_len]
        psd_payload = self.payload[band_len:]
        num_bands = int(self.payload[0])

        expected_payload_len = band_len + num_bands
        if len(self.payload) != expected_payload_len:
            raise RecoverableSampleException(
                'unexpected number of non-directional parameters (got %d, expected %d)'
                % (len(self.payload), expected_payload_len))

        # append the band description parameters
        for value, (name, ptype) in zip(bands_header,
                                        self.band_parameter_types):
            particle_parameters.append(self._encode_value(name, value, ptype))

        # append the non-directional PSD array, from the end of the frequency spacing group to the last floating
        # point match
        particle_parameters.append(
            self._encode_value(ArrayParticleKeys.PSD_NON_DIRECTIONAL,
                               psd_payload, list_encoder_factory(float)))

        return particle_parameters
Beispiel #6
0
    def _build_parsed_values(self):
        """
        Set the timestamp and encode the common particles from the raw data using COMMON_PARTICLE_MAP
        """
        utc_time, self.dcl_data, checksum = self.extract_dcl_parts(
            self.raw_data)
        if utc_time:
            # DCL controller timestamp  is the port_timestamp
            self.set_port_timestamp(unix_time=utc_time)

        if not self.dcl_data:
            raise RecoverableSampleException('Missing DCL data segment')

        if not checksum or checksum != self.compute_checksum(self.dcl_data):
            self.contents[DataParticleKey.
                          QUALITY_FLAG] = DataParticleValue.CHECKSUM_FAILED

        csv = self.dcl_data.split(',')
        if len(csv) < 7:
            raise RecoverableSampleException(
                'DCL format error: missing items from common wavss header')
        self.marker, self.date, self.time, self.serial_number, self.buoy_id, self.latitude, self.longitude = csv[:
                                                                                                                 7]

        # Instrument timestamp  is the internal_timestamp
        instrument_timestamp = timestamp_yyyymmddhhmmss_to_ntp(self.date +
                                                               self.time)
        self.set_internal_timestamp(instrument_timestamp)

        self.payload = csv[7:]

        return [self._encode_value('serial_number', self.serial_number, str)]
Beispiel #7
0
    def _process_chunk_not_containing_data_record_or_header_part(self, chunk):
        """
        This method processes a chunk that does not contain a data record or header.  This case is
        not applicable to "non_data".  For cspp file streams, we expect some lines in the file that
        we do not care about, and we will not consider them "non_data".
        @param chunk A regular expression match object for a cspp header row
        """

        if HEX_ASCII_LINE_MATCHER.match(chunk):
            # we found a line starting with the timestamp, depth, and
            # suspect timestamp, followed by all hex ascii chars
            log.warn('got hex ascii corrupted data %s at position %s', chunk,
                     self._read_state[StateKey.POSITION])
            self._exception_callback(
                RecoverableSampleException(
                    "Found hex ascii corrupted data: %s" % chunk))

        # ignore the expected timestamp line and any lines matching the ignore regex,
        # otherwise data is unexpected
        elif not TIMESTAMP_LINE_MATCHER.match(chunk) and not \
        (self._ignore_matcher is not None and self._ignore_matcher.match(chunk)):
            # Unexpected data was found
            log.warn('got unrecognized row %s at position %s', chunk,
                     self._read_state[StateKey.POSITION])
            self._exception_callback(
                RecoverableSampleException("Found an invalid chunk: %s" %
                                           chunk))
Beispiel #8
0
    def parse_file(self):
        """
        Entry point into parsing the file
        Loop through the file one ensemble at a time
        """

        position = 0  # set position to beginning of file
        header_id_bytes = self._stream_handle.read(2)  # read the first two bytes of the file

        while header_id_bytes:  # will be None when EOF is found

            if header_id_bytes == ADCPS_PD0_HEADER_REGEX:

                # get the ensemble size from the next 2 bytes (excludes checksum bytes)
                num_bytes = struct.unpack("<H", self._stream_handle.read(2))[0]

                self._stream_handle.seek(position)  # reset to beginning of ensemble
                input_buffer = self._stream_handle.read(num_bytes + 2)  # read entire ensemble

                if len(input_buffer) == num_bytes + 2:  # make sure there are enough bytes including checksum

                    try:
                        pd0 = AdcpPd0Record(input_buffer, glider=self._glider)

                        velocity = self._particle_classes['velocity'](pd0)
                        self._record_buffer.append(velocity)

                        config = self._particle_classes['config'](pd0)
                        engineering = self._particle_classes['engineering'](pd0)

                        for particle in [config, engineering]:
                            if self._changed(particle):
                                self._record_buffer.append(particle)

                        if hasattr(pd0, 'bottom_track'):
                            bt = self._particle_classes['bottom_track'](pd0)
                            bt_config = self._particle_classes['bottom_track_config'](pd0)
                            self._record_buffer.append(bt)

                            if self._changed(bt_config):
                                self._record_buffer.append(bt_config)

                    except PD0ParsingException:
                        # seek to just past this header match
                        # self._stream_handle.seek(position + 2)
                        self._exception_callback(RecoverableSampleException("Exception parsing PD0"))

                else:  # reached EOF
                    log.warn("not enough bytes left for complete ensemble")
                    self._exception_callback(UnexpectedDataException("Found incomplete ensemble at end of file"))

            else:  # did not get header ID bytes
                log.warn('did not find header ID bytes')
                self._exception_callback(RecoverableSampleException(
                    "Did not find Header ID bytes where expected, trying next 2 bytes"))

            position = self._stream_handle.tell()  # set the new file position
            header_id_bytes = self._stream_handle.read(2)  # read the next two bytes of the file
    def _process_data_match(self, particle_class, data_match,
                            result_particles):
        """
        This method processes a data match.  It will extract a metadata particle and insert it into
         result_particles when we have not already extracted the metadata and all header values exist.
         This method will also extract a data particle and append it to the result_particles.
        @param particle_class is the class of particle to be created
        @param data_match A regular expression match object for a cspp data record
        @param result_particles A list which should be updated to include any particles extracted
        """

        # Extract the data record particle
        data_particle = self._extract_sample(particle_class, None, data_match,
                                             None)

        # If we created a data particle, let's append the particle to the result particles
        # to return and increment the state data positioning
        if data_particle:

            if not self._read_state[StateKey.METADATA_EXTRACTED]:
                # once the first data particle is read, all header lines should have
                # also been read

                # Source File is the only part of the header that is required
                if self._header_state[
                        DefaultHeaderKey.SOURCE_FILE] is not None:
                    metadata_particle = self._extract_sample(
                        self._metadata_particle_class, None,
                        (copy.copy(self._header_state), data_match), None)
                    if metadata_particle:
                        # We're going to insert the metadata particle so that it is
                        # the first in the list and set the position to 0, as it cannot
                        # have the same position as the non-metadata particle
                        result_particles.insert(
                            0, (metadata_particle, {
                                StateKey.POSITION: 0,
                                StateKey.METADATA_EXTRACTED: True
                            }))
                    else:
                        # metadata particle was not created successfully
                        log.warn('Unable to create metadata particle')
                        self._exception_callback(
                            RecoverableSampleException(
                                'Unable to create metadata particle'))
                else:
                    # no source file path, don't create metadata particle
                    log.warn('No source file, not creating metadata particle')
                    self._exception_callback(
                        RecoverableSampleException(
                            'No source file, not creating metadata particle'))

                # need to set metadata extracted to true so we don't keep creating
                # the metadata, even if it failed
                self._read_state[StateKey.METADATA_EXTRACTED] = True

            result_particles.append(
                (data_particle, copy.copy(self._read_state)))
Beispiel #10
0
    def _build_parsed_values(self):
        """
        Encode the common and bands parameters from the raw data using the particle maps, and extract the 3 mean
        directional arrays
        """
        particle_parameters = super(WavssADclMeanDirectionalDataParticle,
                                    self)._build_parsed_values()

        band_len = len(self.band_parameter_types)
        if len(self.payload) < (band_len + 2):
            raise RecoverableSampleException(
                'missing bands particle map header data')

        bands_header = self.payload[:band_len]
        num_bands = int(self.payload[0])

        expected_payload_len = band_len + num_bands * 3 + 2
        if len(self.payload) != expected_payload_len:
            raise RecoverableSampleException(
                'unexpected number of mean-directional parameters (got %d, expected %d)'
                % (len(self.payload), expected_payload_len))

        # append the band description parameters
        for value, (name, ptype) in zip(bands_header,
                                        self.band_parameter_types):
            particle_parameters.append(self._encode_value(name, value, ptype))

        # append the mean directional specific parameters
        mean_header = self.payload[band_len:]

        for value, (name, ptype) in zip(mean_header, self.parameter_types):
            particle_parameters.append(self._encode_value(name, value, ptype))

        # split up the array into 3 arrays each number of bands in length, taking each 3rd item, size of array
        # checked in wavss parser
        spectra_payload = self.payload[band_len + 2:]
        psd = spectra_payload[0:num_bands * 3:3]
        mean_dir = spectra_payload[1:num_bands * 3:3]
        dir_spread = spectra_payload[2:num_bands * 3:3]

        # to match with non-directional data, the mean directional arrays must be padded with NaNs so they are
        # the same size
        for i in xrange(num_bands, MEAN_DIR_NUMBER_BANDS):
            psd.append(np.nan)
            mean_dir.append(np.nan)
            dir_spread.append(np.nan)

        # append and encode the particle mean directional arrays
        particle_parameters.extend(
            (self._encode_value(ArrayParticleKeys.PSD_MEAN_DIRECTIONAL, psd,
                                float_list_encoder),
             self._encode_value(ArrayParticleKeys.MEAN_DIRECTION_ARRAY,
                                mean_dir, float_list_encoder),
             self._encode_value(ArrayParticleKeys.DIRECTIONAL_SPREAD_ARRAY,
                                dir_spread, float_list_encoder)))

        return particle_parameters
Beispiel #11
0
    def process_velocity_data(self):
        """
        Handles the processing of velocity data particles and handles error processing if events
        which should have occurred prior to receiving a velocity record did not happen.
        """
        # Get the timestamp of the velocity record in case we need it for the metadata particle.
        timestamp = VelptAbDclDataParticle.get_timestamp(self._current_record)

        # If this flag is still indicating TRUE, it means we found NO diagnostic records.
        # That's an error!
        if self._first_diagnostics_record:
            self._first_diagnostics_record = False
            log.warning('No diagnostic records present, just a header.'
                        'No particles generated')
            self._exception_callback(
                RecoverableSampleException(
                    'No diagnostic records present, just a header.'
                    'No particles generated'))

        # This flag indicates that diagnostics were being produced and now that
        # the first velocity record has been encountered, it's time to match the
        # number of diagnostics particles produced against the number of diagnostic
        # records expected from the diagnostics header.
        if self._sending_diagnostics:
            self._sending_diagnostics = False
            if self._total_diagnostic_records != self._diagnostics_count:
                if self._diagnostics_count < self._total_diagnostic_records:
                    log.warning(
                        'Not enough diagnostics records, got %s, expected %s',
                        self._diagnostics_count,
                        self._total_diagnostic_records)
                    self._exception_callback(
                        RecoverableSampleException(
                            'Not enough diagnostics records'))

                elif self._diagnostics_count > self._total_diagnostic_records:
                    log.warning(
                        'Too many diagnostics records, got %s, expected %s',
                        self._diagnostics_count,
                        self._total_diagnostic_records)
                    self._exception_callback(
                        RecoverableSampleException(
                            'Too many diagnostics records'))
                    self._diagnostics_count = 0
                    self._total_diagnostic_records = 0

        velocity_data_dict = VelptAbDclDataParticle.generate_data_dict(
            self._current_record)

        particle = self._extract_sample(self._velocity_data_class,
                                        None,
                                        velocity_data_dict,
                                        internal_timestamp=timestamp)

        self._record_buffer.append(particle)
Beispiel #12
0
    def build_instrument_metadata_particle(self, timestamp):
        """
        The instrument metadata particle is built from three separate records:
        the hardware configuration record, the head configuration record and the
        user configuration record. These should all be concentrated at the very
        beginning of the recovered data file. This assumption is made because the
        instrument is configured before being deployed so the records holding
        this data would be stored first. The data files seen as of the date this
        code was written all start with the three config records, then a quantity
        of velocity data records, then a group of diagnostics (header plus data
        records) followed by more velocity data records. This sequence can be
        repeated a number of times in one file, though the config data will only
        occur once. It is remotely possible that a group of diagnostics could
        occur before any velocity data records are encountered. Publishing of
        the instrument metadata particle is triggered by encountering either the
        first velocity data record or the first diagnostics data record.
        Counting to see if all three configuration records were encountered and
        then producing the instrument metadata particle was rejected as it is
        remotely possible that one of the configuration records could be missing.
        """
        self._config_metadata_built = True

        date_time_group = VelptAbDataParticle.get_date_time_string(self._current_record)

        instrument_metadata_dict = VelptAbDataParticle.generate_instrument_metadata_dict\
            (date_time_group, self._hardware_config_dict, self._head_config_dict,
             self._user_config_dict)

        particle = self._extract_sample(self._instrument_metadata_class,
                                        None,
                                        instrument_metadata_dict,
                                        timestamp)

        self._record_buffer.append(particle)

        # Check to see if all the configuration records were found
        if not self._hardware_config_dict_generated:
            # Log a warning for the missing hardware config record
            log.warning(
                'Hardware configuration record invalid or not present in recovered data')
            self._exception_callback(RecoverableSampleException(
                'Hardware configuration record invalid or not present in recovered data'))

        if not self._head_config_dict_generated:
            # Log a warning for the missing head config record
            log.warning('Head configuration record invalid or not present in recovered data')
            self._exception_callback(RecoverableSampleException(
                'Head configuration record invalid or not present in recovered data'))

        if not self._user_config_dict_generated:
            # Log a warning for the missing user config record
            log.warning('User configuration record invalid or not present in recovered data')
            self._exception_callback(RecoverableSampleException(
                'User configuration record invalid or not present in recovered data'))
    def parse_file(self):
        """
        Parse through the file, pulling single lines and comparing to the established patterns,
        generating particles for data lines
        """
        
        for line in self._stream_handle:
            # check for a match against the corrected Endurance pattern
            match = ENDURANCE_CORR_MATCHER.match(line)

            if match is not None:
                log.debug('record found')
                data_particle = self._extract_sample(self._particle_class,
                                                     None,
                                                     match,
                                                     None)
                self._record_buffer.append(data_particle)

            else:
                # check to see if this is any other expected format
                test_uncorr = UNCORR_MATCHER.match(line)
                test_pioneer = PIONEER_MATCHER.match(line)
                test_meta = METADATA_MATCHER.match(line)

                if test_uncorr is None and test_meta is None and test_pioneer is None:
                    # something in the data didn't match a required regex, so raise an exception and press on.
                    message = "Error while decoding parameters in data: [%s]" % line
                    self._exception_callback(RecoverableSampleException(message))
Beispiel #14
0
    def _build_parsed_values(self):
        """
        Take something in the data format and turn it into
        an array of dictionaries defining the data in the particle
        with the appropriate tag.
        @throws RecoverableSampleException If there is a problem with sample creation
        """

        results = []

        try:

            # Append the base metadata parsed values to the results to return
            results += self._build_metadata_parsed_values()

            data_match = self.raw_data[MetadataRawDataKey.DATA_MATCH]

            # Set the internal timestamp
            internal_timestamp_unix = numpy.float(data_match.group(
                DataMatchesGroupNumber.PROFILER_TIMESTAMP))
            self.set_internal_timestamp(unix_time=internal_timestamp_unix)

        except (ValueError, TypeError, IndexError) as ex:
            log.warn("Exception when building parsed values")
            raise RecoverableSampleException("Error (%s) while decoding parameters in data: [%s]"
                                             % (ex, self.raw_data))

        return results
Beispiel #15
0
    def _build_parsed_values(self):
        """
        Take something in the data format and turn it into
        an array of dictionaries defining the data in the particle
        with the appropriate tag.
        @throws RecoverableSampleException If there is a problem with sample creation
        """
        results = []

        try:

            # Process each of the instrument particle parameters
            for name, group, function in ENGINEERING_PARTICLE_ENCODING_RULES:
                results.append(self._encode_value(name, self.raw_data.group(group), function))

            # # Set the internal timestamp
            internal_timestamp_unix = numpy.float(self.raw_data.group(
                DataMatchesGroupNumber.PROFILER_TIMESTAMP))
            self.set_internal_timestamp(unix_time=internal_timestamp_unix)

        # We shouldn't end up with an exception due to the strongly specified regex, but we
        # will ensure we catch any potential errors just in case
        except (ValueError, TypeError, IndexError) as ex:
            log.warn("Exception when building parsed values")
            raise RecoverableSampleException("Error (%s) while decoding parameters in data: [%s]"
                                             % (ex, self.raw_data))

        return results
Beispiel #16
0
    def __init__(self,
                 raw_data,
                 port_timestamp=None,
                 internal_timestamp=None,
                 preferred_timestamp=DataParticleKey.PORT_TIMESTAMP,
                 quality_flag=DataParticleValue.OK,
                 new_sequence=None):

        super(AdcpsJlnSioDataParticle,
              self).__init__(raw_data, port_timestamp, internal_timestamp,
                             preferred_timestamp, quality_flag, new_sequence)

        self._data_match = DATA_MATCHER.match(self.raw_data[8:])

        if not self._data_match:

            raise RecoverableSampleException(
                "AdcpsJlnSioParserDataParticle: No regex match of "
                "parsed sample data [%s]" % self.raw_data[8:])

        date_str = self.unpack_date(self._data_match.group(0)[11:19])

        unix_time = utilities.zulu_timestamp_to_utc_time(date_str)

        self.set_internal_timestamp(unix_time=unix_time)
 def log_warning(self, msg_text, which_line):
     """
     :param msg_text: The text to display in the log
     :param which_line: The line number where the problem occurred
     """
     self._exception_callback(
         RecoverableSampleException(msg_text + ' %d - No particle generated', which_line))
Beispiel #18
0
    def parse_file(self):
        """
        Parse through the file, pulling single lines and comparing to the established patterns,
        generating particles for data lines
        """

        for line in self._stream_handle:
            # first check for a match against the uncorrected pattern
            match = DATA_MATCHER.match(line)

            if match is not None:
                log.debug('record found')

                data_particle = self._extract_sample(self._particle_class,
                                                     None, match, None)
                self._record_buffer.append(data_particle)

            else:
                test_status = STATUS_MATCHER.match(line)
                # just ignore the status messages

                if test_status is None:
                    test_empty = EMPTY_MATCHER.match(line)
                    # empty lines exist in all sample files, suppress warning due to empty line
                    if test_empty is None:
                        # something in the data didn't match a required regex, so raise an exception and press on.
                        message = "Error while decoding parameters in data: [%s]" % line
                        self._exception_callback(
                            RecoverableSampleException(message))
Beispiel #19
0
    def parse_file(self):
        """
        Parse through the file, pulling single lines and comparing to the established patterns,
        generating particles for data lines
        """

        for line in self._stream_handle:

            match = BEGIN_MATCHER.match(line)

            if match is not None:

                count = match.group(DataMatchesGroupNumber.NUM_WAVELENGTHS)

                data_regex = self._build_data_regex(BEGIN_REGEX, count)

                fields = re.match(data_regex, line)

                if fields is not None:
                    self._process_data_match(fields, self._record_buffer)
                else:  # did not match the regex
                    log.warn("line did not match regex %s", line)
                    self._exception_callback(
                        RecoverableSampleException(
                            "Found an invalid line: %s" % line))

            else:
                # Check for head part match
                header_part_match = HEADER_PART_MATCHER.match(line)

                if header_part_match is not None:
                    self._process_header_part_match(header_part_match)
                else:
                    self._process_line_not_containing_data_record_or_header_part(
                        line)
Beispiel #20
0
    def _build_parsed_values(self):
        """
        Take something in the data format and turn it into
        an array of dictionaries defining the data in the particle
        with the appropriate tag.
        @throws RecoverableSampleException If there is a problem with sample creation
        """

        try:

            # Generate a particle by calling encode_value for each entry
            # in the Instrument Particle Mapping table,
            # where each entry is a tuple containing the particle field name
            # and a function to use for data conversion.

            return [
                self._encode_value(name, self.raw_data[name], function)
                for name, function in WINCH_CSPP_PARTICLE_ENCODING_RULES
            ]

        except (ValueError, TypeError, IndexError) as ex:
            log.warn("Exception when building parsed values")
            raise RecoverableSampleException(
                "Error (%s) while encoding parameters in data: [%s]" %
                (ex, self.raw_data))
Beispiel #21
0
    def _process_chunk_not_containing_data_record_or_header_part(self, chunk):
        """
        This method processes a chunk that does not contain a data record or header.  This case is
        not applicable to "non_data".  For cspp file streams, we expect some lines in the file that
        we do not care about, and we will not consider them "non_data".
        @param chunk A regular expression match object for a cspp header row
        """

        # Check for the expected timestamp line we will ignore
        timestamp_line_match = TIMESTAMP_LINE_MATCHER.match(chunk)
        # Check for other status messages we can ignore
        ignore_match = IGNORE_MATCHER.match(chunk)

        if timestamp_line_match is not None or ignore_match is not None:
            # Ignore
            pass

        else:

            # OK.  We got unexpected data
            log.warn('got unrecognized row %s at position %s', chunk,
                     self._read_state[StateKey.POSITION])
            self._exception_callback(
                RecoverableSampleException("Found an invalid chunk: %s" %
                                           chunk))
Beispiel #22
0
    def _build_parsed_values(self):
        """
        Take something in the data format and turn it into
        an array of dictionaries defining the data in the particle
        with the appropriate tag.
        @throws RecoverableSampleException If there is a problem with sample creation
        """
        results = []

        # split the entire matched line by tabs, which will return each parameters
        # value as an array of string
        params = self.raw_data.group(0).split('\t')
        if len(params) < NUM_FIELDS:
            log.warn('Not enough fields could be parsed from the data %s',
                     self.raw_data.group(0))
            raise RecoverableSampleException('Not enough fields could be parsed from the data %s' %
                                             self.raw_data.group(0))

        for name, index, encode_function in self._parameter_map:
            if name == self._spectral_channels:
                # spectral channels is an array of ints, need to do the extra map
                results.append(self._encode_value(name,
                                                  map(int, params[index:GRP_SPECTRAL_END]),
                                                  encode_function))
            else:
                results.append(self._encode_value(name, params[index], encode_function))

        internal_timestamp_unix = float(params[GRP_PROFILER_TIMESTAMP])
        self.set_internal_timestamp(unix_time=internal_timestamp_unix)

        return results
Beispiel #23
0
    def parse_file(self):
        """
        Parse Winch CSPP text file.
        """

        # loop over all lines in the data file and parse the data to generate Winch CSPP particles
        for line in self._stream_handle:

            match = WINCH_DATA_MATCHER.match(line)
            if not match:
                # If it is not a valid Winch Cspp record, ignore it.
                error_message = 'Winch Cspp data regex does not match for line: %s' % line
                log.warn(error_message)
                self._exception_callback(
                    RecoverableSampleException(error_message))
            else:

                date = match.group(WinchCsppParserDataParticleKey.DATE)
                year, month, day = date.split('-')
                hour, minute, second = match.group(
                    WinchCsppParserDataParticleKey.TIME).split(':')

                unix_time = calendar.timegm(
                    (int(year), int(month), int(day), int(hour), int(minute),
                     float(second)))
                time_stamp = ntplib.system_to_ntp_time(unix_time)

                # Generate a Winch CSPP particle using the group dictionary and add it to the internal buffer
                particle = self._extract_sample(WinchCsppDataParticle, None,
                                                match.groupdict(), time_stamp)
                if particle is not None:
                    self._record_buffer.append(particle)
Beispiel #24
0
    def _build_parsed_values(self):
        """
        Take something in the data format and turn it into
        an array of dictionaries defining the data in the particle
        with the appropriate tag.
        @throws SampleException If there is a problem with sample creation
        """

        results = []

        try:

            # Process each of the instrument particle parameters
            for rule in INSTRUMENT_PARTICLE_ENCODING_RULES:

                results.append(
                    self._encode_value(
                        rule[PARTICLE_KEY_INDEX],
                        self.raw_data.group(
                            rule[DATA_MATCHES_GROUP_NUMBER_INDEX]),
                        rule[TYPE_ENCODING_INDEX]))

            # # Set the internal timestamp
            internal_timestamp_unix = numpy.float(
                self.raw_data.group(DataMatchesGroupNumber.PROFILER_TIMESTAMP))
            self.set_internal_timestamp(unix_time=internal_timestamp_unix)

        except (ValueError, TypeError, IndexError) as ex:
            log.warn("Exception when building parsed values")
            raise RecoverableSampleException(
                "Error (%s) while decoding parameters in data: %s" %
                (ex, self.raw_data))

        log.debug('FlortDjCsppInstrumentDataParticle: particle=%s', results)
        return results
    def _generate_metadata_particle(self):
        """
        This function generates a metadata particle.
        """

        if self._metadata_matches_dict[
                MetadataMatchKey.FILE_TIME_MATCH] is None:
            message = "Unable to create metadata particle due to missing file time"
            log.warn(message)
            self._exception_callback(RecoverableSampleException(message))
        else:
            particle_data = dict()

            for key in self._metadata_matches_dict.keys():
                log.trace('key: %s, particle_data: %s', key, particle_data)

                if self._metadata_matches_dict[key]:
                    self._process_metadata_match_dict(key, particle_data)

            utc_time = formatted_timestamp_utc_time(
                particle_data[PhsenAbcdefImodemDataParticleKey.FILE_TIME],
                "%Y%m%d %H%M%S")
            ntp_timestamp = ntplib.system_to_ntp_time(utc_time)

            # Generate the metadata particle class and add the
            # result to the list of particles to be returned.
            particle = self._extract_sample(self._metadata_particle_class,
                                            None,
                                            particle_data,
                                            internal_timestamp=ntp_timestamp)
            if particle is not None:
                log.trace("Appending metadata particle to record buffer: %s",
                          particle.generate())
                self._record_buffer.append(particle)
Beispiel #26
0
    def _build_parsed_values(self):
        """
        Take something in the data format and turn it into
        an array of dictionaries defining the data in the particle
        with the appropriate tag.
        @throws RecoverableSampleException If there is a problem with sample creation
        """
        results = []

        try:
            results.append(
                self._encode_value(
                    CtdpfJCsppParserDataParticleKey.PROFILER_TIMESTAMP,
                    self.raw_data.group(
                        DataMatchesGroupNumber.PROFILER_TIMESTAMP),
                    numpy.float))

            results.append(
                self._encode_value(
                    CtdpfJCsppParserDataParticleKey.SUSPECT_TIMESTAMP,
                    self.raw_data.group(
                        DataMatchesGroupNumber.SUSPECT_TIMESTAMP),
                    encode_y_or_n))

            results.append(
                self._encode_value(
                    CtdpfJCsppParserDataParticleKey.TEMPERATURE,
                    self.raw_data.group(DataMatchesGroupNumber.TEMPERATURE),
                    float))

            results.append(
                self._encode_value(
                    CtdpfJCsppParserDataParticleKey.CONDUCTIVITY,
                    self.raw_data.group(DataMatchesGroupNumber.CONDUCTIVITY),
                    float))

            results.append(
                self._encode_value(
                    CtdpfJCsppParserDataParticleKey.PRESSURE,
                    self.raw_data.group(DataMatchesGroupNumber.PRESSURE),
                    float))

            results.append(
                self._encode_value(
                    CtdpfJCsppParserDataParticleKey.SALINITY,
                    self.raw_data.group(DataMatchesGroupNumber.SALINITY),
                    float))

            # Set the internal timestamp
            internal_timestamp_unix = numpy.float(
                self.raw_data.group(DataMatchesGroupNumber.PROFILER_TIMESTAMP))
            self.set_internal_timestamp(unix_time=internal_timestamp_unix)

        except (ValueError, TypeError, IndexError) as ex:
            log.warn("Exception when building parsed values")
            raise RecoverableSampleException(
                "Error (%s) while decoding parameters in data: [%s]" %
                (ex, self.raw_data))

        return results
Beispiel #27
0
    def _validate_checksum(self, input_buffer):

        num_bytes = struct.unpack("<H", input_buffer[2:4])[0]
        # get the number of bytes in the record, number of bytes is immediately
        # after the sentinel bytes and does not include the 2 checksum bytes

        record_start = 0
        record_end = num_bytes

        #if there is enough in the buffer check the record
        if record_end <= len(input_buffer[0:-CHECKSUM_BYTES]):
            #make sure the checksum bytes are in the buffer too

            total = 0
            for i in range(record_start, record_end):
                total += ord(input_buffer[i])
            #add up all the bytes in the record

            checksum = total & CHECKSUM_MODULO  # bitwise and with 65535 or mod vs 65536

            #log.debug("checksum & total = %d %d ", checksum, total)

            if checksum == struct.unpack(
                    "<H",
                    input_buffer[record_end:record_end + CHECKSUM_BYTES])[0]:
                return True
            else:
                err_msg = 'ADCPT ACFGM DCL RECORD FAILED CHECKSUM'
                self._exception_callback(RecoverableSampleException(err_msg))
                log.warn(err_msg)
                return False
Beispiel #28
0
    def _build_parsed_values(self):
        """
        Take something in the data format and turn it into
        a particle with the appropriate tag.
        @throws SampleException If there is a problem with sample creation
        """
        match = HEADER_FOOTER_MATCHER.search(self.raw_data) 
        if not match:
            raise RecoverableSampleException("AdcpsJlnStcMetadataParserDataParticle: No regex match of \
                                  parsed sample data [%s]", self.raw_data)

        result = [self._encode_value(AdcpsJlnStcMetadataParserDataParticleKey.ADCPS_JLN_TIMESTAMP,
                                     match.group(1), str),
                  self._encode_value(AdcpsJlnStcMetadataParserDataParticleKey.ADCPS_JLN_ID,
                                     match.group(2), int),
                  self._encode_value(AdcpsJlnStcMetadataParserDataParticleKey.ADCPS_JLN_SERIAL_NUMBER,
                                     match.group(3), int),
                  self._encode_value(AdcpsJlnStcMetadataParserDataParticleKey.ADCPS_JLN_VOLTS,
                                     match.group(4), float),
                  self._encode_value(AdcpsJlnStcMetadataParserDataParticleKey.ADCPS_JLN_RECORDS,
                                     match.group(5), int),
                  self._encode_value(AdcpsJlnStcMetadataParserDataParticleKey.ADCPS_JLN_LENGTH,
                                     match.group(6), int),
                  self._encode_value(AdcpsJlnStcMetadataParserDataParticleKey.ADCPS_JLN_EVENTS,
                                     match.group(7), int),
                  self._encode_value(AdcpsJlnStcMetadataParserDataParticleKey.ADCPS_JLN_SAMPLES_WRITTEN,
                                     match.group(8), int),
                  ]
        return result
Beispiel #29
0
    def parse_file(self):
        """
        Parse through the file, pulling single lines and comparing to the established patterns,
        generating particles for data lines
        """

        for line in self._stream_handle:

            data_match = NEW_DATA_MATCHER.match(line)
            if data_match:
                # DCL controller timestamp  is the port_timestamp
                port_timestamp = dcl_time_to_ntp(
                    data_match.groups()[DCL_TIMESTAMP])

                # particle-ize the data block received, return the record
                data_particle = self._extract_sample(
                    self._particle_class,
                    None,
                    data_match,
                    port_timestamp=port_timestamp,
                    preferred_ts=DataParticleKey.PORT_TIMESTAMP)
                # increment state for this chunk even if we don't get a particle
                self._record_buffer.append(data_particle)

            else:
                # NOTE: Need to check for the metadata line last, since the corrected Endurance
                # record also has the [*] pattern
                test_meta = METADATA_MATCHER.match(line)

                if test_meta is None:
                    # something in the data didn't match a required regex, so raise an exception and press on.
                    message = "Error while decoding parameters in data: [%s]" % line
                    self._exception_callback(
                        RecoverableSampleException(message))
Beispiel #30
0
 def recov_exception(self, error_message):
     """
     Add a warning log message and use the exception callback to pass a recoverable exception
     @param error_message: The error message to use in the log and callback
     """
     log.warn(error_message)
     self._exception_callback(RecoverableSampleException(error_message))