Example #1
0
    def parse_file(self):
        """
        Parse through the file, pulling single lines and comparing to the established patterns,
        generating particles for data lines
        """

        for line in self._stream_handle:

            data_match = NEW_DATA_MATCHER.match(line)
            if data_match:
                # DCL controller timestamp  is the port_timestamp
                port_timestamp = dcl_time_to_ntp(
                    data_match.groups()[DCL_TIMESTAMP])

                # particle-ize the data block received, return the record
                data_particle = self._extract_sample(
                    self._particle_class,
                    None,
                    data_match,
                    port_timestamp=port_timestamp,
                    preferred_ts=DataParticleKey.PORT_TIMESTAMP)
                # increment state for this chunk even if we don't get a particle
                self._record_buffer.append(data_particle)

            else:
                # NOTE: Need to check for the metadata line last, since the corrected Endurance
                # record also has the [*] pattern
                test_meta = METADATA_MATCHER.match(line)

                if test_meta is None:
                    # something in the data didn't match a required regex, so raise an exception and press on.
                    message = "Error while decoding parameters in data: [%s]" % line
                    self._exception_callback(
                        RecoverableSampleException(message))
Example #2
0
    def parse_file(self):
        """
        Parse through the file, pulling single lines and comparing to the established patterns,
        generating particles for data lines
        """

        for line in self._stream_handle:

            data_match = NEW_DATA_MATCHER.match(line)
            if data_match:
                # DCL controller timestamp  is the port_timestamp
                port_timestamp = dcl_time_to_ntp(data_match.groups()[DCL_TIMESTAMP])

                # particle-ize the data block received, return the record
                data_particle = self._extract_sample(self._particle_class,
                                                     None,
                                                     data_match,
                                                     port_timestamp=port_timestamp,
                                                     preferred_ts = DataParticleKey.PORT_TIMESTAMP)
                # increment state for this chunk even if we don't get a particle
                self._record_buffer.append(data_particle)

            else:
                # NOTE: Need to check for the metadata line last, since the corrected Endurance
                # record also has the [*] pattern
                test_meta = METADATA_MATCHER.match(line)

                if test_meta is None:
                    # something in the data didn't match a required regex, so raise an exception and press on.
                    message = "Error while decoding parameters in data: [%s]" % line
                    self._exception_callback(RecoverableSampleException(message))
    def parse_file(self):
        """
        Parse through the file, pulling single lines and comparing to the established patterns,
        generating particles for data lines
        """

        for line in self._stream_handle:
            # check for a match against the sensor data pattern
            match = SENSOR_DATA_MATCHER.match(line)

            if match is not None:
                log.debug('record found')

                # DCL controller timestamp  is the port_timestamp
                port_timestamp = dcl_time_to_ntp(
                    match.groups()[SENSOR_GROUP_TIMESTAMP])

                data_particle = self._extract_sample(
                    self._particle_class,
                    None,
                    match.groups(),
                    port_timestamp=port_timestamp,
                    preferred_ts=DataParticleKey.PORT_TIMESTAMP)

                self._record_buffer.append(data_particle)

            else:
                # check to see if this is any other expected format
                test_meta = METADATA_MATCHER.match(line)

                if test_meta is None or line.find(TAB) != -1:
                    # something in the data didn't match a required regex, so raise an exception and press on.
                    message = "Error while decoding parameters in data: [%s]" % line
                    self._exception_callback(UnexpectedDataException(message))
Example #4
0
    def parse_file(self):
        """
        The main parsing function which loops over each line in the file and extracts particles if the correct
        format is found.
        """
        # read the first line in the file
        line = self._stream_handle.readline()

        while line:
            # check for a data line or a dcl logger line we specifically ignore
            data_match = DATA_LINE_MATCHER.match(line)
            ignore_match = IGNORE_LINE_MATCHER.match(line)

            if data_match:
                # found a data line, extract this particle
                # DCL controller timestamp  is the port_timestamp
                dcl_controller_timestamp = data_match.groups()[DCL_TIMESTAMP_GROUP]
                port_timestamp = dcl_time_to_ntp(dcl_controller_timestamp)

                particle = self._extract_sample(self.particle_class,
                                                None,
                                                data_match,
                                                port_timestamp=port_timestamp,
                                                preferred_ts=DataParticleKey.PORT_TIMESTAMP)

                self._record_buffer.append(particle)

            elif not ignore_match:
                # we found a line with an unknown format, call an exception
                error_message = 'Found line with unknown format %s' % line
                log.warn(error_message)
                self._exception_callback(SampleException(error_message))

            # read the next line
            line = self._stream_handle.readline()
    def parse_file(self):
        """
        Parse through the file, pulling single lines and comparing to the established patterns,
        generating particles for data lines
        """

        for line in self._stream_handle:
            # check for a match against the sensor data pattern
            match = SENSOR_DATA_MATCHER.match(line)

            if match is not None:
                log.debug('record found')

                # DCL controller timestamp  is the port_timestamp
                port_timestamp = dcl_time_to_ntp(match.groups()[SENSOR_GROUP_TIMESTAMP])

                data_particle = self._extract_sample(self._particle_class,
                                                     None,
                                                     match.groups(),
                                                     port_timestamp=port_timestamp,
                                                     preferred_ts=DataParticleKey.PORT_TIMESTAMP)

                self._record_buffer.append(data_particle)

            else:
                # check to see if this is any other expected format
                test_meta = METADATA_MATCHER.match(line)

                if test_meta is None or line.find(TAB) != -1:
                    # something in the data didn't match a required regex, so raise an exception and press on.
                    message = "Error while decoding parameters in data: [%s]" % line
                    self._exception_callback(UnexpectedDataException(message))
Example #6
0
    def parse_file(self):
        """
        Parse the zplsc_c log file (averaged condensed data).
        Read file line by line. Values are extracted from lines containing condensed ASCII data
        @return: dictionary of data values with the particle names as keys or None
        """

        # Loop over all lines in the data file and parse the data to generate particles
        for number, line in enumerate(self._stream_handle, start=1):

            # Check if this is the dcl status log
            match = DCL_LOG_MATCHER.match(line)
            if match is not None:
                log.trace("MATCHED DCL_LOG_MATCHER: %s: %s", number, match.groups())
                # No data to extract, move on to the next line
                continue

            # Check if this is the instrument phase status log
            match = PHASE_STATUS_MATCHER.match(line)
            if match is not None:
                log.trace("MATCHED PHASE_STATUS_MATCHER: %s: %s", number, match.groups())
                # No data to extract, move on to the next line
                continue

            # Check if this is the instrument condensed ASCII data
            match = SENSOR_DATA_MATCHER.match(line)
            if match is not None:
                log.trace("MATCHED SENSOR_DATA_MATCHER: %s: %s", number, match.groups())

                # Extract the condensed ASCII data from this line
                data_dict = self.parse_line(match)
                if data_dict is None:
                    log.error('Erroneous data found in line %s: %s', number, line)
                    continue

                dcl_timestamp = data_dict[ZplscCDataKey.DCL_TIMESTAMP]
                # dcl_timestamp is the port_timestamp
                port_timestamp = dcl_time_to_ntp(dcl_timestamp)

                transmission_timestamp = data_dict[ZplscCParticleKey.TRANS_TIMESTAMP]
                # transmission_timestamp is the the internal_timestamp
                internal_timestamp = timestamp_yyyymmddhhmmss_to_ntp(transmission_timestamp)

                # Extract a particle and append it to the record buffer.
                particle = self._extract_sample(ZplscCInstrumentDataParticle,
                                                None,
                                                data_dict,
                                                internal_timestamp=internal_timestamp,
                                                port_timestamp=port_timestamp,
                                                preferred_ts=DataParticleKey.PORT_TIMESTAMP)
                if particle is not None:
                    log.trace('Parsed particle: %s' % particle.generate_dict())
                    self._record_buffer.append(particle)

                continue

            # Error, line did not match any expected regex
            self._exception_callback(
                RecoverableSampleException('Unknown data found in line %s:%s' % (number, line)))
    def __init__(self, raw_data, instrument_particle_map, *args, **kwargs):

        super(DclInstrumentDataParticle, self).__init__(raw_data, *args, **kwargs)

        # DCL Controller timestamp is the port_timestamp
        port_timestamp = dcl_time_to_ntp(self.raw_data[SENSOR_GROUP_TIMESTAMP])
        self.set_port_timestamp(port_timestamp)

        self.instrument_particle_map = instrument_particle_map
    def _generate_port_timestamp(record_dict):
        """
        Generates the port_timestamp from the given DCL Controller Timestamp.
        :param record_dict: dictionary containing the dcl controller timestamp str parameter
        :return: the port_timestamp
        """

        return float(
            dcl_time_to_ntp(
                record_dict[Pco2wAbcDataParticleKey.DCL_CONTROLLER_TIMESTAMP]))
Example #9
0
    def __init__(self, raw_data, instrument_particle_map, *args, **kwargs):

        super(DclInstrumentDataParticle,
              self).__init__(raw_data, *args, **kwargs)

        # DCL Controller timestamp is the port_timestamp
        port_timestamp = dcl_time_to_ntp(self.raw_data[SENSOR_GROUP_TIMESTAMP])
        self.set_port_timestamp(port_timestamp)

        self.instrument_particle_map = instrument_particle_map
    def _extract_dcl_controller_ntp_timestamp(self, inst_match):
        """
        This function will create a timestamp to be used as the port_timestamp
        for the instrument particle is generated.
        """

        # calculate the instrument particle port_timestamp
        # from the DCL timestamp.
        return dcl_time_to_ntp(
            inst_match.group(
                InstrumentDataMatchGroups.INST_GROUP_DCL_TIMESTAMP))
    def parse_file(self):
        """
        Parse through the file, pulling single lines and comparing to the established patterns,
        generating particles for data lines
        """

        for line in self._stream_handle:
            # first check for a match against the uncorrected pattern
            match = UNCORR_MATCHER.match(line)
            if match is None:
                # check for a match against corrected Endurance pattern
                match = ENDURANCE_CORR_MATCHER.match(line)
            if match is None:
                # check for a match against Pioneer pattern
                match = PIONEER_MATCHER.match(line)
            if match is None:
                # check for a match against CTDBP_FLORT pattern
                match = CTDBP_FLORT_MATCHER.match(line)

            if match is not None:
                log.debug('record found')

                # DCL Controller timestamp is the port_timestamp
                port_timestamp = dcl_time_to_ntp(
                    match.group('dcl_controller_timestamp'))

                # Instrument timestamp is the internal_timestamp
                internal_timestamp = timestamp_ddmmyyyyhhmmss_to_ntp(
                    match.group('date_time_string'))

                data_particle = self._extract_sample(
                    self._particle_class,
                    None,
                    match,
                    port_timestamp=port_timestamp,
                    internal_timestamp=internal_timestamp,
                    preferred_ts=DataParticleKey.PORT_TIMESTAMP)

                self._record_buffer.append(data_particle)

            else:
                # NOTE: Need to check for the metadata line last, since the corrected Endurance
                # record also has the [*] pattern
                test_meta = METADATA_MATCHER.match(line)

                if test_meta is None:
                    # something in the data didn't match a required regex, so raise an exception and press on.
                    message = "Error while decoding parameters in data: [%s]" % line
                    self._exception_callback(
                        RecoverableSampleException(message))
    def parse_file(self):
        """
        Parse through the file, pulling single lines and comparing to the established patterns,
        generating particles for data lines
        """

        for line in self._stream_handle:
            # first check for a match against the uncorrected pattern
            match = UNCORR_MATCHER.match(line)
            if match is None:
                # check for a match against corrected Endurance pattern
                match = ENDURANCE_CORR_MATCHER.match(line)
            if match is None:
                # check for a match against Pioneer pattern
                match = PIONEER_MATCHER.match(line)
            if match is None:
                # check for a match against CTDBP_FLORT pattern
                match = CTDBP_FLORT_MATCHER.match(line)

            if match is not None:
                log.debug('record found')

                # DCL Controller timestamp is the port_timestamp
                port_timestamp = dcl_time_to_ntp(match.group('dcl_controller_timestamp'))

                # Instrument timestamp is the internal_timestamp
                internal_timestamp = timestamp_ddmmyyyyhhmmss_to_ntp(match.group('date_time_string'))

                data_particle = self._extract_sample(self._particle_class,
                                                     None,
                                                     match,
                                                     port_timestamp=port_timestamp,
                                                     internal_timestamp=internal_timestamp,
                                                     preferred_ts=DataParticleKey.PORT_TIMESTAMP)

                self._record_buffer.append(data_particle)

            else:
                # NOTE: Need to check for the metadata line last, since the corrected Endurance
                # record also has the [*] pattern
                test_meta = METADATA_MATCHER.match(line)

                if test_meta is None:
                    # something in the data didn't match a required regex, so raise an exception and press on.
                    message = "Error while decoding parameters in data: [%s]" % line
                    self._exception_callback(RecoverableSampleException(message))
    def __init__(self,
                 raw_data,
                 port_timestamp=None,
                 internal_timestamp=None,
                 preferred_timestamp=DataParticleKey.PORT_TIMESTAMP,
                 quality_flag=DataParticleValue.OK,
                 new_sequence=None):
        super(PresfAbcDclParserTideDataParticle,
              self).__init__(raw_data, port_timestamp, internal_timestamp,
                             preferred_timestamp, quality_flag, new_sequence)

        # DCL Controller timestamp is the port_timestamp
        dcl_controller_timestamp = dcl_time_to_ntp(
            self.raw_data.group(TIDE_GROUP_DCL_TIMESTAMP))
        self.set_port_timestamp(dcl_controller_timestamp)

        # Instrument timestamp  is the internal_timestamp
        instrument_timestamp = timestamp_ddmmyyyyhhmmss_to_ntp(
            self.raw_data.group(TIDE_GROUP_DATA_TIME_STRING))
        self.set_internal_timestamp(instrument_timestamp)
Example #14
0
    def __init__(self, raw_data,
                 port_timestamp=None,
                 internal_timestamp=None,
                 preferred_timestamp=DataParticleKey.PORT_TIMESTAMP,
                 quality_flag=DataParticleValue.OK,
                 new_sequence=None):
        super(PresfAbcDclParserTideDataParticle, self).__init__(raw_data,
                                                                port_timestamp,
                                                                internal_timestamp,
                                                                preferred_timestamp,
                                                                quality_flag,
                                                                new_sequence)

        # DCL Controller timestamp is the port_timestamp
        dcl_controller_timestamp = dcl_time_to_ntp(self.raw_data.group(TIDE_GROUP_DCL_TIMESTAMP))
        self.set_port_timestamp(dcl_controller_timestamp)

        # Instrument timestamp  is the internal_timestamp
        instrument_timestamp = timestamp_ddmmyyyyhhmmss_to_ntp(self.raw_data.group(TIDE_GROUP_DATA_TIME_STRING))
        self.set_internal_timestamp(instrument_timestamp)
Example #15
0
    def parse_file(self):
        """
        The main parsing function which loops over each line in the file and extracts particles if the correct
        format is found.
        """
        # read the first line in the file
        line = self._stream_handle.readline()

        while line:
            # check for a data line or a dcl logger line we specifically ignore
            data_match = DATA_LINE_MATCHER.match(line)
            ignore_match = IGNORE_LINE_MATCHER.match(line)

            if data_match:
                # found a data line, extract this particle
                # DCL controller timestamp  is the port_timestamp
                dcl_controller_timestamp = data_match.groups(
                )[DCL_TIMESTAMP_GROUP]
                port_timestamp = dcl_time_to_ntp(dcl_controller_timestamp)

                particle = self._extract_sample(
                    self.particle_class,
                    None,
                    data_match,
                    port_timestamp=port_timestamp,
                    preferred_ts=DataParticleKey.PORT_TIMESTAMP)

                self._record_buffer.append(particle)

            elif not ignore_match:
                # we found a line with an unknown format, call an exception
                error_message = 'Found line with unknown format %s' % line
                log.warn(error_message)
                self._exception_callback(SampleException(error_message))

            # read the next line
            line = self._stream_handle.readline()
    def parse_file(self):
        """
        Parse through the file, pulling single lines and comparing to the established patterns,
        generating particles for data lines
        """

        #  initialize data for modem particle
        first_timestamp = None
        date_timestamp = None
        distance = None
        dsp_bat = None
        xmit_bat = None

        #  initialize raw_data for CsppEngDclEngDataParticle
        self._eng_data = [None] * 10

        for line in self._stream_handle:

            data_match = RECORD_MATCHER.match(line)

            if data_match is None:
                message = 'got malformed line %s ' % line
                log.warn(message)
                self._exception_callback(RecoverableSampleException(message))
                continue
            if data_match.group('mode') == 'sent':
                continue  # skip sent messages, go to next line

            timestamp_str = data_match.group('timestamp')
            message = data_match.group('message')

            if first_timestamp is None:
                first_timestamp = timestamp_str  # save the first timestamp for the modem particle

            # save off header information for modem particle
            # modem particle created after processing entire file.
            range_match = RANGE_MATCHER.match(message)
            if range_match:
                distance = range_match.group('range')
                continue  # go to next line
            dsp_match = DSP_MATCHER.match(message)
            if dsp_match:
                dsp_bat = dsp_match.group('dsp_bat')
                continue  # go to next line
            xmit_match = XMIT_MATCHER.match(message)
            if xmit_match:
                xmit_bat = xmit_match.group('dsp_bat')
                continue  # go to next line

            # process NMEA sentences
            nmea_match = NMEA_MATCHER.match(message)
            if nmea_match:
                sentence = nmea_match.group('sentence')
                checksum = int(nmea_match.group('checksum'), 16)  # Convert to integer

                # Note: NMEA checksums typically do not include the $ at the
                # beginning of the sentence but it appears Wetlabs implemented
                # it that way.
                comp_checksum = self.calc_checksum(sentence)

                if comp_checksum == checksum:
                    fields = sentence.split(',')
                    command = fields[5]
                    count = fields[6]

                    sentence_params = NMEA_SENTENCE_MAP.get(command)

                    if sentence_params is None:
                        # skip NMEA sentences we are not looking for
                        log.debug('NMEA sentence skipped %s', line)
                        continue  # go to next line

                    expected_count, particle_class = sentence_params
                    if int(count) != expected_count:
                        message = 'did not get expected number of fields on line %s' % line
                        log.warn(message)
                        self._exception_callback(RecoverableSampleException(message))
                        continue  # go to next line

                    if particle_class == CsppEngDclEngDataParticle:
                        if command == 'DATE':
                            date_timestamp = timestamp_str  # save timestamp from the DATE record
                            self.process_date(fields[7:])
                        elif command == 'PFS':
                            self._eng_data[1:3] = fields[7:9]
                        elif command == 'PST':
                            self.process_start(fields[7:])
                        elif command == 'ENA':
                            self._eng_data[5:7] = fields[7:9]
                        elif command == 'WHE':
                            self.process_wave(fields[7:])

                    else:
                        # Create particle and add to buffer
                        timestamp = dcl_time_to_ntp(timestamp_str)
                        data_particle = self._extract_sample(particle_class,
                                                             None,
                                                             fields[7:],
                                                             port_timestamp=timestamp,
                                                             preferred_ts=DataParticleKey.PORT_TIMESTAMP)

                        self._record_buffer.append(data_particle)
                else:
                    message = 'checksum failed on line %s' % line
                    log.warn(message)
                    self._exception_callback(RecoverableSampleException(message))

        # end for loop

        # only send modem particle if we have a timestamp
        # and at least one parameter
        if first_timestamp and (distance or dsp_bat or xmit_bat):
            timestamp = dcl_time_to_ntp(first_timestamp)
            data_particle = self._extract_sample(CsppEngDclModemParticle,
                                                 None,
                                                 [distance, dsp_bat, xmit_bat],
                                                 port_timestamp=timestamp,
                                                 preferred_ts=DataParticleKey.PORT_TIMESTAMP)

            self._record_buffer.append(data_particle)

        if any(self._eng_data):  # Publish CsppEngDclEngDataParticle if we have any data
            if date_timestamp:  # preference is DATE timestamp
                timestamp = dcl_time_to_ntp(date_timestamp)
            else:
                timestamp = dcl_time_to_ntp(first_timestamp)

            data_particle = self._extract_sample(CsppEngDclEngDataParticle,
                                                 None,
                                                 self._eng_data,
                                                 port_timestamp=timestamp,
                                                 preferred_ts=DataParticleKey.PORT_TIMESTAMP)

            self._record_buffer.append(data_particle)
Example #17
0
    def parse_file(self):
        """
        Parse through the file, pulling single lines and comparing to
        the established patterns, generating particles for data lines
        """
        for line in self._stream_handle:
            message = 'data line \n%s' % line
            log.debug(message)

            # First check for valid FLORT DJ DCL data
            # If this is a valid sensor data record,
            # use the extracted fields to generate a particle.
            sensor_match = SENSOR_DATA_MATCHER.match(line)

            if sensor_match is not None:
                self._particle_class._data_particle_map = INSTRUMENT_PARTICLE_MAP
                log.debug('FLORT DJ match found')
            else:
                log.debug('FLORT DJ match NOT found')
                # check for a match against the FLORT D data in a combined
                # CTDBP FLORT instrument record
                sensor_match = CTDBP_FLORT_MATCHER.match(line)

                if sensor_match is not None:
                    self._particle_class._data_particle_map = CTDBP_FLORT_PARTICLE_MAP
                    log.debug('check for CTDBP/FLORT match')

            if sensor_match is not None:
                # FLORT data matched against one of the patterns
                log.debug('record found')

                # DCL Controller timestamp is the port_timestamp
                dcl_controller_timestamp = sensor_match.groups()[SENSOR_GROUP_TIMESTAMP]
                port_timestamp = dcl_time_to_ntp(dcl_controller_timestamp)

                if self._particle_class._data_particle_map == INSTRUMENT_PARTICLE_MAP:
                    # For valid FLORT DJ data, Instrument timestamp is the internal_timestamp
                    instrument_timestamp = sensor_match.groups()[SENSOR_GROUP_SENSOR_DATE] \
                                           + ' ' + sensor_match.groups()[SENSOR_GROUP_SENSOR_TIME]
                    internal_timestamp = timestamp_mmddyyhhmmss_to_ntp(instrument_timestamp)
                else:
                    # _data_particle_map is CTDBP_FLORT_PARTICLE_MAP
                    utc_time = formatted_timestamp_utc_time(sensor_match.groups()[CTDBP_FLORT_GROUP_DATE_TIME],
                                                            "%d %b %Y %H:%M:%S")
                    instrument_timestamp = ntplib.system_to_ntp_time(utc_time)
                    internal_timestamp = instrument_timestamp

                # using port_timestamp as preferred_ts because internal_timestamp is not accurate
                particle = self._extract_sample(self._particle_class,
                                                None,
                                                sensor_match.groups(),
                                                port_timestamp=port_timestamp,
                                                internal_timestamp=internal_timestamp,
                                                preferred_ts=DataParticleKey.PORT_TIMESTAMP)
                # increment state for this chunk even if we don't
                # get a particle
                self._record_buffer.append(particle)

            # It's not a sensor data record, see if it's a metadata record.
            else:
                log.debug('No data recs found, check for meta record')

                # If it's a valid metadata record, ignore it.
                # Otherwise generate warning for unknown data.

                meta_match = METADATA_MATCHER.match(line)
                if meta_match is None:
                    error_message = 'Unknown data found in chunk %s' % line
                    log.warn(error_message)
                    self._exception_callback(UnexpectedDataException(error_message))
Example #18
0
    def parse_file(self):
        """
        Parse out any pending data chunks in the chunker.
        If it is valid data, build a particle.
        Go until the chunker has no more valid data.
        @retval a list of tuples with sample particles encountered in this
            parsing, plus the state.
        """

        data = self._stream_handle.read()
        position = 0  # keep track of where we are in the file

        matches = SENSOR_DATA_MATCHER.finditer(data)

        for sensor_match in matches:

            start = sensor_match.start()

            #  check to see if we skipped over any data
            if start != position:
                skipped_data = data[position:start]
                meta_match = METADATA_MATCHER.match(skipped_data)
                if meta_match.group(0) == skipped_data:
                    pass  # ignore all metadata records
                else:
                    error_message = 'Unknown data found in line %s' % skipped_data
                    log.warn(error_message)
                    self._exception_callback(UnexpectedDataException(error_message))

            position = sensor_match.end()  # increment the position

            groups = sensor_match.groups()

            # See if the checksum is correct.
            # Checksum is the modulo 256 sum of all data bytes.
            # If calculated checksum is zero, the record checksum is valid.

            buffer_checksum = groups[SENSOR_GROUP_CHECKSUM_SECTION]
            checksum = reduce(lambda x, y: x + y,
                              map(ord, buffer_checksum)) % 256

            if checksum == 0:
                checksum_status = CHECKSUM_PASSED
            else:
                checksum_status = CHECKSUM_FAILED

            # Create a tuple containing all the data to be used when
            # creating the particle.
            # The order of the particle data matches the PARTICLE_GROUPS.

            particle_data = (
                groups[SENSOR_GROUP_TIMESTAMP],
                groups[SENSOR_GROUP_YEAR],
                groups[SENSOR_GROUP_MONTH],
                groups[SENSOR_GROUP_DAY],
                groups[SENSOR_GROUP_HOUR],
                groups[SENSOR_GROUP_MINUTE],
                groups[SENSOR_GROUP_SECOND],
                groups[SENSOR_GROUP_ID],
                groups[SENSOR_GROUP_SERIAL],
                groups[SENSOR_GROUP_TIMER],
                struct.unpack('>h', groups[SENSOR_GROUP_DELAY])[0],
                list(struct.unpack('>7I', groups[SENSOR_GROUP_ADC_COUNTS])),
                struct.unpack('>H', groups[SENSOR_GROUP_SUPPLY_VOLTAGE])[0],
                struct.unpack('>H', groups[SENSOR_GROUP_ANALOG_VOLTAGE])[0],
                struct.unpack('>H', groups[SENSOR_GROUP_TEMPERATURE])[0],
                struct.unpack('>B', groups[SENSOR_GROUP_FRAME_COUNT])[0],
                checksum_status
            )

            # DCL Controller timestamp is the port_timestamp
            port_timestamp = dcl_time_to_ntp(groups[SENSOR_GROUP_TIMESTAMP])

            particle = self._extract_sample(self.particle_class,
                                            None,
                                            particle_data,
                                            port_timestamp=port_timestamp,
                                            preferred_ts=DataParticleKey.PORT_TIMESTAMP)

            self._record_buffer.append(particle)
    def _process_instrument_data(self, working_record):
        """
        Determines which particle to produce, calls extract_sample to create the given particle
        """
        log.debug(
            "PhsenAbcdefDclParser._process_instrument_data(): aggregate working_record size %s is %s",
            len(working_record), working_record)

        # this size includes the leading * character
        instrument_record_length = 465

        # this size includes the leading * character
        control_record_length_without_voltage_battery = 39

        # this size includes the leading * character
        control_record_length_with_voltage_battery = 43

        data_type = self._determine_data_type(working_record)

        # DCL controller timestamp  is the port_timestamp
        port_timestamp = dcl_time_to_ntp(self.latest_dcl_time)

        if data_type is not DataTypeEnum.UNKNOWN:

            # Create a tuple for the particle composed of the working record and latest DCL time
            # The tuple allows for DCL time to be available when EXTERNAL calls each particle's
            # build_parse_values method
            particle_data = (self.latest_dcl_time, working_record)

            if data_type is DataTypeEnum.INSTRUMENT:

                # Per the IDD, if the candidate data is not the proper size, throw a recoverable exception
                if len(working_record) == instrument_record_length:

                    # Create particle mule (to be used later to create the instrument particle)
                    particle = self._extract_sample(
                        self._instrument_data_particle_class,
                        None,
                        particle_data,
                        port_timestamp=port_timestamp)

                    self._record_buffer.append(particle)
                else:
                    self._exception_callback(
                        RecoverableSampleException(
                            "PhsenAbcdefDclParser._process_instrument_data(): "
                            "Throwing RecoverableSampleException, Size of data "
                            "record is not the length of an instrument data record"
                        ))

            elif data_type is DataTypeEnum.CONTROL:

                # Per the IDD, if the candidate data is not the proper size, throw a recoverable exception
                if len(working_record) == control_record_length_without_voltage_battery or \
                   len(working_record) == control_record_length_with_voltage_battery:

                    # Create particle mule (to be used later to create the metadata particle)
                    particle = self._extract_sample(
                        self._metadata_particle_class,
                        None,
                        particle_data,
                        port_timestamp=port_timestamp)

                    self._record_buffer.append(particle)
                else:
                    log.warn(
                        "PhsenAbcdefDclParser._process_instrument_data(): "
                        "Size of data record is not the length of a control data record"
                    )

                    self._exception_callback(
                        RecoverableSampleException(
                            "PhsenAbcdefDclParser._process_instrument_data(): "
                            "Throwing RecoverableSampleException, Size of data "
                            "record is not the length of a control data record"
                        ))
        else:
            log.warn(
                "PhsenAbcdefDclParser._process_instrument_data(): "
                "Throwing RecoverableSampleException, Record is neither instrument or control"
            )

            self._exception_callback(
                RecoverableSampleException(
                    "PhsenAbcdefDclParser._process_instrument_data(): "
                    "Data Type is neither Control or Instrument"))
Example #20
0
    def parse_file(self):
        """
        Entry point into parsing the file, loop over each line and interpret it until the entire file is parsed
        """
        stored_start_timestamp = None

        # read the first line in the file
        line = self._stream_handle.readline()

        while line:
            # data will be at start of line so use match
            data_match = DATA_START_MATCHER.match(line)
            # instrument started may be in middle so use search
            log_match = LOG_START_MATCHER.match(line)

            if data_match:
                # found a data line
                dcl_timestamp = data_match.group(1)

                # Note Bug #10002 found early deployments created data missing commas
                # between some fields.  Replace commas with space and then split to
                # correctly parse files from deployments with either firmware

                fields_set = line[START_N_CHARS:].replace(',', ' ')
                fields = fields_set.split()

                if len(fields) != N_FIELDS:
                    msg = 'Expected %d fields but received %d' % (N_FIELDS,
                                                                  len(fields))
                    log.warn(msg)
                    self._exception_callback(SampleException(msg))
                else:
                    # create an array of the fields to parse in the particle
                    raw_data = [stored_start_timestamp, dcl_timestamp]
                    raw_data.extend(fields)

                    # DCL controller timestamp  is the port_timestamp
                    port_timestamp = dcl_time_to_ntp(
                        raw_data[DCL_CONTROLLER_TIMESTAMP])

                    # datacollection time is the internal_timestamp
                    unix_ts = float(raw_data[DATA_COLLECTION_TIME])
                    internal_timestamp = ntplib.system_to_ntp_time(unix_ts)

                    # extract this particle
                    particle = self._extract_sample(
                        self.particle_class,
                        None,
                        raw_data,
                        port_timestamp=port_timestamp,
                        internal_timestamp=internal_timestamp,
                        preferred_ts=DataParticleKey.PORT_TIMESTAMP)

                    self._record_buffer.append(particle)
                    stored_start_timestamp = None

            elif log_match:
                # pull out whatever text is within the log
                log_contents = log_match.group(2)

                # there are two cases, a log message simply contains the 'Instrument Started' text, or it contains
                # an entire other log message which may contain 'Instrument Started'
                instr_log_match = INSTRUMENT_STARTED_MATCHER.match(
                    log_contents)
                full_log_instr_match = INSTRUMENT_START_LOG_MATCHER.match(
                    log_contents)

                # text other than instrument started is ignored within log messages
                if instr_log_match:
                    # found a line containing a single log instrument started, hold on to it until we get a data line
                    stored_start_timestamp = log_match.group(1)
                elif full_log_instr_match:
                    # found a log within a log, use the inner timestamp associated with the instrument start
                    stored_start_timestamp = full_log_instr_match.group(1)

            else:
                msg = 'Data with unexpected format received: %s' % line
                log.warn(msg)
                self._exception_callback(UnexpectedDataException(msg))

            line = self._stream_handle.readline()
Example #21
0
    def parse_file(self):
        """
        Parser for velpt_ab_dcl data.
        """
        line_count = 0

        # Read a single line from the input file
        fuelcell_input_row = self._file_handle.readline()

        # Read the file, one line at a time
        while fuelcell_input_row:

            line_count += 1

            # Check to see if this record contains fuel cell data
            if not NON_DATA_MATCHER.search(fuelcell_input_row):

                # Is the record properly time stamped?
                found_date_time_group = DATE_MATCHER.search(fuelcell_input_row)

                # If so, continue processing
                if found_date_time_group:

                    # Grab the time stamp data from the data
                    date_time_group = found_date_time_group.group(1)

                    # Now get the fuel cell data from the input line
                    found_data = START_DATA_MATCHER.search(fuelcell_input_row)

                    # If an integer was found, followed by a comma, the line has fuel cell data.
                    if found_data:

                        data_string = fuelcell_input_row[found_data.start(1) +
                                                         1:]

                        # Need to find the colon near the end of the line which marks the
                        # end of the actual fuel cell data. The colon marks the end of the
                        # fuel cell data followed by the checksum for that data. Following
                        # that there will be a space then a hexadecimal number. If any of those
                        # elements are missing, the data is suspect.
                        found_end = END_DATA_MATCHER.search(data_string)

                        if found_end:

                            # first find the last space in the data_string (start of the terminator)
                            terminator_index = data_string.rfind(' ')
                            the_data = data_string[:terminator_index]

                            # Now replace any extraneous spaces in the data
                            the_data = the_data.replace(' ', '')

                            data_plus_checksum = the_data.split(':')
                            actual_data = data_plus_checksum[0]
                            read_checksum = int(data_plus_checksum[1])

                            if self.good_checksum(actual_data, read_checksum):
                                the_fields = actual_data.split(',')

                                if self.good_field(the_fields):
                                    # DCL controller timestamp  is the port_timestamp
                                    dcl_controller_timestamp = date_time_group
                                    port_timestamp = dcl_time_to_ntp(
                                        dcl_controller_timestamp)

                                    raw_data = [date_time_group]
                                    raw_data.extend(the_fields)

                                    particle = self._extract_sample(
                                        self._fuelcell_data_class,
                                        None,
                                        raw_data,
                                        port_timestamp=port_timestamp,
                                        preferred_ts=DataParticleKey.
                                        PORT_TIMESTAMP)

                                    self._record_buffer.append(particle)

                                else:
                                    self.log_warning('Improper format line',
                                                     line_count)
                            else:
                                self.log_warning('Bad checksum line',
                                                 line_count)
                        else:
                            self.log_warning('No terminator found on line',
                                             line_count)
                    else:
                        self.log_warning('No data found on line', line_count)
                else:
                    self.log_warning('Bad/Missing Timestamp on line',
                                     line_count)
            else:  # No FC Data is an expected occurance, do not raise exception
                log.debug('No fuel cell data on line %d', line_count)

            # Read another line from the input file
            fuelcell_input_row = self._file_handle.readline()
Example #22
0
    def parse_file(self):
        """
        Parse through the file, pulling single lines and comparing to
        the established patterns, generating particles for data lines
        """
        for line in self._stream_handle:
            message = 'data line \n%s' % line
            log.debug(message)

            # First check for valid FLORT DJ DCL data
            # If this is a valid sensor data record,
            # use the extracted fields to generate a particle.
            sensor_match = SENSOR_DATA_MATCHER.match(line)

            if sensor_match is not None:
                self._particle_class._data_particle_map = INSTRUMENT_PARTICLE_MAP
                log.debug('FLORT DJ match found')
            else:
                log.debug('FLORT DJ match NOT found')
                # check for a match against the FLORT D data in a combined
                # CTDBP FLORT instrument record
                sensor_match = CTDBP_FLORT_MATCHER.match(line)

                if sensor_match is not None:
                    self._particle_class._data_particle_map = CTDBP_FLORT_PARTICLE_MAP
                    log.debug('check for CTDBP/FLORT match')

            if sensor_match is not None:
                # FLORT data matched against one of the patterns
                log.debug('record found')

                # DCL Controller timestamp is the port_timestamp
                dcl_controller_timestamp = sensor_match.groups()[SENSOR_GROUP_TIMESTAMP]
                port_timestamp = dcl_time_to_ntp(dcl_controller_timestamp)

                if self._particle_class._data_particle_map == INSTRUMENT_PARTICLE_MAP:
                    # For valid FLORT DJ data, Instrument timestamp is the internal_timestamp
                    instrument_timestamp = sensor_match.groups()[SENSOR_GROUP_SENSOR_DATE] \
                                           + ' ' + sensor_match.groups()[SENSOR_GROUP_SENSOR_TIME]
                    internal_timestamp = timestamp_mmddyyhhmmss_to_ntp(instrument_timestamp)
                else:
                    # _data_particle_map is CTDBP_FLORT_PARTICLE_MAP
                    utc_time = formatted_timestamp_utc_time(sensor_match.groups()[CTDBP_FLORT_GROUP_DATE_TIME],
                                                            "%d %b %Y %H:%M:%S")
                    instrument_timestamp = ntplib.system_to_ntp_time(utc_time)
                    internal_timestamp = instrument_timestamp

                # using port_timestamp as preferred_ts because internal_timestamp is not accurate
                particle = self._extract_sample(self._particle_class,
                                                None,
                                                sensor_match.groups(),
                                                port_timestamp=port_timestamp,
                                                internal_timestamp=internal_timestamp,
                                                preferred_ts=DataParticleKey.PORT_TIMESTAMP)
                # increment state for this chunk even if we don't
                # get a particle
                self._record_buffer.append(particle)

            # It's not a sensor data record, see if it's a metadata record.
            else:
                log.debug('No data recs found, check for meta record')

                # If it's a valid metadata record, ignore it.
                # Otherwise generate warning for unknown data.

                meta_match = METADATA_MATCHER.match(line)
                if meta_match is None:
                    error_message = 'Unknown data found in chunk %s' % line
                    log.warn(error_message)
                    self._exception_callback(UnexpectedDataException(error_message))
    def parse_file(self):
        """
        Open and read the file and parser the data within, and at the end of
        this method self._record_buffer will be filled with all the particles in the file.
        """

        while True:  # loop through file looking for beginning of an adcp data burst

            line = self._stream_handle.readline()  # READ NEXT LINE

            if line == "":
                break

            # Check if this is a DCL Log message
            dcl_log_match = DCL_LOG_MATCHER.match(line)
            if dcl_log_match:
                # verified to be a regular DCL Log. Discard & move to next line.
                continue  # skip to next line in outer loop

            line_match = SENSOR_TIME_MATCHER.match(line)
            if line_match is None:
                self.recov_exception_callback("Expected starting DCL Timestamp, received: %r" % line)
                continue  # skip to next line in outer loop

            matches = line_match.groups()
            sensor_data_list = []

            # Save timestamp from the DCL controller log and it's parts
            parsed_data = list(matches[SENSOR_GROUP_TIMESTAMP:SENSOR_TIME_SENSOR_DATE_TIME])
            port_timestamp = matches[SENSOR_GROUP_TIMESTAMP]
            port_timestamp = dcl_time_to_ntp(port_timestamp)

            # Get instrument_timestamp & ensemble_number
            parsed_data.append(matches[SENSOR_TIME_SENSOR_DATE_TIME])
            instrument_timestamp = matches[SENSOR_TIME_SENSOR_DATE_TIME]
            internal_timestamp = dcl_time_to_ntp(instrument_timestamp)
            parsed_data.append(matches[SENSOR_TIME_ENSEMBLE])

            line = self._stream_handle.readline()  # READ NEXT LINE

            line_match = SENSOR_HEAD_MATCHER.match(line)
            if line_match is None:
                self.recov_exception_callback("Expecting Heading, Pitch, & Roll data, received: %r" % line)
                continue  # skip to next line in outer loop

            matches = line_match.groups()
            # Get head, pitch, & roll
            parsed_data.append(matches[HEAD_HEADING])
            parsed_data.append(matches[HEAD_PITCH])
            parsed_data.append(matches[HEAD_ROLL])

            line = self._stream_handle.readline()  # READ NEXT LINE

            line_match = SENSOR_TEMP_MATCHER.match(line)
            if line_match is None:
                self.recov_exception_callback("Expecting Temperature, Speed of Sound, & BIT data,"
                                              " received: %r" % line)
                continue  # skip to next line in outer loop

            matches = line_match.groups()
            # Get temperature,  speed of sound, & BIT values
            parsed_data.append(matches[TEMP_TEMP])
            parsed_data.append(matches[TEMP_SOS])

            binary_string = '{0:08b}'.format(int(matches[TEMP_HEX], 16))
            parsed_data.append(binary_string[3])
            parsed_data.append(binary_string[4])
            parsed_data.append(binary_string[6])

            line = self._stream_handle.readline()  # READ NEXT LINE

            line_match = IGNORE_HEADING_MATCHER.match(line)
            if line_match is None:
                self.recov_exception_callback("Expecting Header, received: %s" % line)
                continue  # skip to next line in outer loop

            # Start looking for sensor data
            while True:  # loop through all the velocity and echo data records

                line = self._stream_handle.readline()  # READ NEXT LINE

                line_match = SENSOR_DATA_MATCHER.match(line)
                if line_match is not None:
                    # Collect velocity data sextets and echo power quartets
                    sensor_data_list.append(line_match.groups()[SENSOR_DATA_BIN:])
                else:
                    try:
                        # Transpose velocity data sextets and echo power quartets
                        np_array = numpy.array(sensor_data_list)
                        parsed_data.extend(np_array.transpose().tolist()[1:])

                        # Get number of cells
                        parsed_data.append(sensor_data_list[-1][0])

                        particle = self._extract_sample(self._particle_class,
                                                        None,
                                                        parsed_data,
                                                        port_timestamp=port_timestamp,
                                                        internal_timestamp=internal_timestamp,
                                                        preferred_ts=DataParticleKey.PORT_TIMESTAMP)
                        if particle is not None:
                            self._record_buffer.append(particle)

                    except Exception:
                        self.recov_exception_callback("Error parsing sensor data row,"
                                                      " received: %s" % line)

                    break  # exit inner loop once a particle has been produced
    def parse_file(self):
        """
        Parser for velpt_ab_dcl data.
        """
        line_count = 0

        # Read a single line from the input file
        fuelcell_input_row = self._file_handle.readline()

        # Read the file, one line at a time
        while fuelcell_input_row:

            line_count += 1

            # Check to see if this record contains fuel cell data
            if not NON_DATA_MATCHER.search(fuelcell_input_row):

                # Is the record properly time stamped?
                found_date_time_group = DATE_MATCHER.search(fuelcell_input_row)

                # If so, continue processing
                if found_date_time_group:

                    # Grab the time stamp data from the data
                    date_time_group = found_date_time_group.group(1)

                    # Now get the fuel cell data from the input line
                    found_data = START_DATA_MATCHER.search(fuelcell_input_row)

                    # If an integer was found, followed by a comma, the line has fuel cell data.
                    if found_data:

                        data_string = fuelcell_input_row[found_data.start(1)+1:]

                        # Need to find the colon near the end of the line which marks the
                        # end of the actual fuel cell data. The colon marks the end of the
                        # fuel cell data followed by the checksum for that data. Following
                        # that there will be a space then a hexadecimal number. If any of those
                        # elements are missing, the data is suspect.
                        found_end = END_DATA_MATCHER.search(data_string)

                        if found_end:

                            # first find the last space in the data_string (start of the terminator)
                            terminator_index = data_string.rfind(' ')
                            the_data = data_string[:terminator_index]

                            # Now replace any extraneous spaces in the data
                            the_data = the_data.replace(' ', '')

                            data_plus_checksum = the_data.split(':')
                            actual_data = data_plus_checksum[0]
                            read_checksum = int(data_plus_checksum[1])

                            if self.good_checksum(actual_data, read_checksum):
                                the_fields = actual_data.split(',')

                                if self.good_field(the_fields):
                                    # DCL controller timestamp  is the port_timestamp
                                    dcl_controller_timestamp = date_time_group
                                    port_timestamp = dcl_time_to_ntp(dcl_controller_timestamp)

                                    raw_data = [date_time_group]
                                    raw_data.extend(the_fields)

                                    particle = self._extract_sample(self._fuelcell_data_class,
                                                                    None,
                                                                    raw_data,
                                                                    port_timestamp=port_timestamp,
                                                                    preferred_ts=DataParticleKey.PORT_TIMESTAMP)

                                    self._record_buffer.append(particle)

                                else:
                                    self.log_warning('Improper format line', line_count)
                            else:
                                self.log_warning('Bad checksum line', line_count)
                        else:
                            self.log_warning('No terminator found on line', line_count)
                    else:
                        self.log_warning('No data found on line', line_count)
                else:
                    self.log_warning('Bad/Missing Timestamp on line', line_count)
            else:  # No FC Data is an expected occurance, do not raise exception
                log.debug('No fuel cell data on line %d', line_count)

            # Read another line from the input file
            fuelcell_input_row = self._file_handle.readline()
    def _process_instrument_data(self, working_record):
        """
        Determines which particle to produce, calls extract_sample to create the given particle
        """
        log.debug("PhsenAbcdefDclParser._process_instrument_data(): aggregate working_record size %s is %s",
                  len(working_record), working_record)

        # this size includes the leading * character
        instrument_record_length = 465

        # this size includes the leading * character
        control_record_length_without_voltage_battery = 39

        # this size includes the leading * character
        control_record_length_with_voltage_battery = 43

        data_type = self._determine_data_type(working_record)

        # DCL controller timestamp  is the port_timestamp
        port_timestamp = dcl_time_to_ntp(self.latest_dcl_time)

        if data_type is not DataTypeEnum.UNKNOWN:

            # Create a tuple for the particle composed of the working record and latest DCL time
            # The tuple allows for DCL time to be available when EXTERNAL calls each particle's
            # build_parse_values method
            particle_data = (self.latest_dcl_time, working_record)

            if data_type is DataTypeEnum.INSTRUMENT:

                # Per the IDD, if the candidate data is not the proper size, throw a recoverable exception
                if len(working_record) == instrument_record_length:

                    # Create particle mule (to be used later to create the instrument particle)
                    particle = self._extract_sample(self._instrument_data_particle_class,
                                                    None,
                                                    particle_data,
                                                    port_timestamp=port_timestamp)

                    self._record_buffer.append(particle)
                else:
                    self._exception_callback(RecoverableSampleException(
                        "PhsenAbcdefDclParser._process_instrument_data(): "
                        "Throwing RecoverableSampleException, Size of data "
                        "record is not the length of an instrument data record"))

            elif data_type is DataTypeEnum.CONTROL:

                # Per the IDD, if the candidate data is not the proper size, throw a recoverable exception
                if len(working_record) == control_record_length_without_voltage_battery or \
                   len(working_record) == control_record_length_with_voltage_battery:

                    # Create particle mule (to be used later to create the metadata particle)
                    particle = self._extract_sample(self._metadata_particle_class,
                                                    None,
                                                    particle_data,
                                                    port_timestamp=port_timestamp)

                    self._record_buffer.append(particle)
                else:
                    log.warn("PhsenAbcdefDclParser._process_instrument_data(): "
                             "Size of data record is not the length of a control data record")

                    self._exception_callback(RecoverableSampleException(
                        "PhsenAbcdefDclParser._process_instrument_data(): "
                        "Throwing RecoverableSampleException, Size of data "
                        "record is not the length of a control data record"))
        else:
            log.warn("PhsenAbcdefDclParser._process_instrument_data(): "
                     "Throwing RecoverableSampleException, Record is neither instrument or control")

            self._exception_callback(RecoverableSampleException("PhsenAbcdefDclParser._process_instrument_data(): "
                                                                "Data Type is neither Control or Instrument"))