예제 #1
0
    def set_state(self, state_obj):
        """
        Set the value of the state object for this parser
        @param state_obj The object to set the state to.
        @throws DatasetParserException if there is a bad state structure
        """

        if not isinstance(state_obj, dict):
            raise DatasetParserException("Invalid state structure")
        if not (StateKey.POSITION in state_obj
                and StateKey.METADATA_EXTRACTED in state_obj):
            raise DatasetParserException(
                "Provided state is missing position or metadata extracted")

        self._state = state_obj
        self._read_state = state_obj

        # Clear the record buffer
        self._record_buffer = []

        # Need to seek the correct position in the file stream using the read state position.
        self._stream_handle.seek(self._read_state[StateKey.POSITION])

        # make sure we have cleaned the chunker out of old data
        self._chunker.clean_all_chunks()
예제 #2
0
    def set_state(self, state_obj):
        """
        Set the value of the state object for this parser
        @param state_obj The object to set the state to.
        @throws DatasetParserException if there is a bad state structure
        """
        if not isinstance(state_obj, dict):
            raise DatasetParserException("Invalid state structure")

        if not (CtdmoStateKey.POSITION in state_obj):
            raise DatasetParserException('%s missing in state keys' %
                                         CtdmoStateKey.POSITION)

        if not (CtdmoStateKey.END_CONFIG in state_obj):
            raise DatasetParserException('%s missing in state keys' %
                                         CtdmoStateKey.END_CONFIG)

        if not (CtdmoStateKey.SERIAL_NUMBER in state_obj):
            raise DatasetParserException('%s missing in state keys' %
                                         CtdmoStateKey.SERIAL_NUMBER)

        self._record_buffer = []
        self._state = state_obj
        self._read_state = state_obj

        self.input_file.seek(state_obj[CtdmoStateKey.POSITION])
예제 #3
0
    def set_state(self, state_obj):
        """
        This method will set the state of the MmpCdsParser to a given state
        @param state_obj the updated state to use
        """
        log.debug("Attempting to set state to: %s", state_obj)
        # First need to make sure the state type is a dict
        if not isinstance(state_obj, dict):
            log.warn("Invalid state structure")
            raise DatasetParserException("Invalid state structure")
        # Then we need to make sure that the provided state includes particles returned information
        if not (StateKey.PARTICLES_RETURNED in state_obj):
            log.debug(PARTICLES_RETURNED_MISSING_ERROR_MSG)
            raise DatasetParserException(PARTICLES_RETURNED_MISSING_ERROR_MSG)

        # Clear out any pre-existing chunks
        self._chunker.clean_all_chunks()

        self._record_buffer = []

        # Set the state and read state to the provide state
        self._state = state_obj

        # Always seek to the beginning of the buffer to read all records
        self._stream_handle.seek(0)
예제 #4
0
 def set_state(self, state_obj):
     """
     initialize the state
     """
     log.trace("Attempting to set state to: %s", state_obj)
     if not isinstance(state_obj, dict):
         raise DatasetParserException("Invalid state structure")
     if not (StateKey.POSITION in state_obj):
         raise DatasetParserException("Invalid state keys")
     self._chunker.clean_all_chunks()
     self._record_buffer = []
     self._state = state_obj
     self._read_state = state_obj
     self._stream_handle.seek(state_obj[StateKey.POSITION])
예제 #5
0
    def assert_particles(self, particles, yml_file, resource_path=None):
        """
        Assert that the contents of the particles match those in the results
        yaml file.

        @param particles either a DataParticle sub-class or particle dictionary
        to compare with the particles in the .yml file
        @param yml_file the .yml file name or full path containing particles
        to compare
        @param resource_path the path to the .yml file, used only if yml_file
        does not contain the full path
        """

        # see if .yml file has the full path
        if os.path.exists(yml_file):
            rs_file = yml_file
        # if not the full path, check if resource path was defined
        elif resource_path is not None:
            rs_file = os.path.join(resource_path, yml_file)
        # out of places to check for the file, raise an error
        else:
            raise DatasetParserException(
                'Test yaml file cannot be found to assert particles')

        # initialize result set with this .yml results file
        rs = ResultSet(rs_file)
        # compare results particles and assert that the output was successful
        self.assertTrue(rs.verify(particles),
                        msg=('Failed unit test data validation for file %s' %
                             yml_file))
예제 #6
0
 def set_state(self, state_obj):
     """
     Set the value of the state object for this parser
     @param state_obj The object to set the state to. 
     @throws DatasetParserException if there is a bad state structure
     """
     if not isinstance(state_obj, dict):
         raise DatasetParserException("Invalid state structure")
     if not (StateKey.POSITION in state_obj):
         raise DatasetParserException("Invalid state keys")
     self._chunker.clean_all_chunks()
     self._record_buffer = []
     self._saved_header = None
     self._state = state_obj
     self._read_state = state_obj
     self._stream_handle.seek(state_obj[StateKey.POSITION])
예제 #7
0
    def __init__(self, config, stream_handle, state, state_callback,
                 publish_callback, exception_callback, *args, **kwargs):

        #
        # Verify that the required parameters are in the parser configuration.
        #
        if not CtdmoStateKey.INDUCTIVE_ID in config:
            raise DatasetParserException("Parser config is missing %s" %
                                         CtdmoStateKey.INDUCTIVE_ID)

        #
        # No fancy sieve function needed for this parser.
        # File is ASCII with records separated by newlines.
        #
        super(CtdmoRecoveredCtParser, self).__init__(
            config, stream_handle, state,
            partial(StringChunker.regex_sieve_function,
                    regex_list=[REC_CT_RECORD_MATCHER]), state_callback,
            publish_callback, exception_callback, *args, **kwargs)

        #
        # Default the position within the file to the beginning
        # and set flags to indicate the end of Configuration has not been reached
        # and the serial number has not been found.
        #
        self._read_state = {
            CtdmoStateKey.POSITION: 0,
            CtdmoStateKey.END_CONFIG: False,
            CtdmoStateKey.SERIAL_NUMBER: None
        }
        self.input_file = stream_handle

        if state is not None:
            self.set_state(state)
예제 #8
0
    def set_state(self, state_obj):
        """
        Set the value of the state object for this parser
        @param state_obj The object to set the state to. Should be a list with
        a StateKey.UNPROCESSED_DATA value, a StateKey.IN_PROCESS_DATA value.
        The UNPROCESSED_DATA and IN_PROCESS_DATA
        are both arrays which contain an array of start and end indices for their
        respective types of data.  The timestamp is an NTP4 format timestamp.
        @throws DatasetParserException if there is a bad state structure
        """
        if not isinstance(state_obj, dict):
            raise DatasetParserException(
                "Invalid state structure - not a dictionary")

        # Verify that all required state keys are present.
        if not ((StateKey.UNPROCESSED_DATA in state_obj) \
                  and (StateKey.IN_PROCESS_DATA in state_obj) \
                  and (StateKey.FILE_SIZE in state_obj)):
            raise DatasetParserException(
                "State key %s, %s or %s missing" %
                (StateKey.UNPROCESSED_DATA, StateKey.IN_PROCESS_DATA,
                 StateKey.FILE_SIZE))

        # store both the start and end point for this read of data within the file
        if state_obj[StateKey.UNPROCESSED_DATA] is None:
            self._position = [0, 0]
        else:
            self._position = [
                state_obj[StateKey.UNPROCESSED_DATA][0][START_IDX],
                state_obj[StateKey.UNPROCESSED_DATA][0][START_IDX]
            ]
        self._record_buffer = []
        self._state = state_obj
        self._read_state = state_obj

        # it is possible to be in the middle of processing a packet.  Since we have to
        # process a whole packet, which may contain multiple samples, we have to
        # re-read the entire packet, then throw out the already received samples
        self._samples_to_throw_out = None
        self._mid_sample_packets = len(state_obj[StateKey.IN_PROCESS_DATA])
        if self._mid_sample_packets > 0 and state_obj[
                StateKey.IN_PROCESS_DATA][0][SAMPLES_RETURNED] > 0:
            self._samples_to_throw_out = state_obj[
                StateKey.IN_PROCESS_DATA][0][SAMPLES_RETURNED]

        # make sure we have cleaned the chunker out of old data so there are no wrap arounds
        self._chunker.clean_all_chunks()
예제 #9
0
    def set_state(self, state_obj):
        """
        Set the value of the state object for this parser
        @param state_obj The object to set the state to. Should be a list with
        a StateKey.UNPROCESSED_DATA value, a StateKey.IN_PROCESS_DATA value,
        and StateKey.TIMESTAMP value. The UNPROCESSED_DATA and IN_PROCESS_DATA
        are both arrays which contain an array of start and end indicies for their
        respective types of data.  The timestamp is an NTP4 format timestamp.
        @throws DatasetParserException if there is a bad state structure
        """
        log.debug("Setting state to: %s", state_obj)
        if not isinstance(state_obj, dict):
            raise DatasetParserException("Invalid state structure")
        if not ((StateKey.UNPROCESSED_DATA in state_obj) and \
            (StateKey.IN_PROCESS_DATA in state_obj) and \
            (StateKey.TIMESTAMP in state_obj)):
            raise DatasetParserException("Invalid state keys")

        self._timestamp = state_obj[StateKey.TIMESTAMP]
        # store both the start and end point for this read of data within the file
        self._position = [
            state_obj[StateKey.UNPROCESSED_DATA][0][0],
            state_obj[StateKey.UNPROCESSED_DATA][0][0]
        ]
        self._record_buffer = []
        self._state = state_obj
        self._read_state = state_obj

        # it is possible to be in the middle of processing a packet.  Since we have to
        # process a whole packet, which may contain multiple samples, we have to
        # re-read the entire packet, then throw out the already received samples
        self._samples_to_throw_out = None
        self._mid_sample_packets = len(state_obj[StateKey.IN_PROCESS_DATA])
        if self._mid_sample_packets > 0 and state_obj[
                StateKey.IN_PROCESS_DATA][0][3] > 0:
            self._samples_to_throw_out = state_obj[
                StateKey.IN_PROCESS_DATA][0][3]

        # make sure we have cleaned the chunker out of old data so there are no wrap arounds
        self._clean_all_chunker()

        self._new_seq_flag = True  # state has changed, start a new sequence

        # seek to the first unprocessed position
        self._stream_handle.seek(state_obj[StateKey.UNPROCESSED_DATA][0][0])
        log.debug('Seeking to %d', state_obj[StateKey.UNPROCESSED_DATA][0][0])
예제 #10
0
    def _read_column_labels(self):
        """
        Read the next three lines to populate column data.

        1st Row (row 15 of file) == labels
        2nd Row (row 16 of file) == units
        3rd Row (row 17 of file) == column byte size

        Currently we are only able to support 3 label line rows.
        """

        # read the label line (should be at row 15 of the file at this point)
        label_list = self._stream_handle.readline().strip().split()
        self.num_columns = len(label_list)
        self._header_dict['labels'] = label_list

        # the m_present_time label is required to generate particles, raise an exception if it is not found
        if GliderParticleKey.M_PRESENT_TIME not in label_list:
            raise DatasetParserException(
                'The m_present_time label has not been found, which means the timestamp '
                'cannot be determined for any particles')

        # read the units line (should be at row 16 of the file at this point)
        data_unit_list = self._stream_handle.readline().strip().split()
        data_unit_list_length = len(data_unit_list)

        # read the number of bytes line (should be at row 17 of the file at this point)
        num_of_bytes_list = self._stream_handle.readline().strip().split()
        num_of_bytes_list_length = len(num_of_bytes_list)

        # number of labels for name, unit, and number of bytes must match
        if data_unit_list_length != self.num_columns or self.num_columns != num_of_bytes_list_length:
            raise DatasetParserException(
                "The number of columns in the labels row: %d, units row: %d, "
                "and number of bytes row: %d are not equal." %
                (self.num_columns, data_unit_list_length,
                 num_of_bytes_list_length))

        # if the number of columns from the header does not match that in the data, but the rest of the file
        # has the same number of columns in each line this is not a fatal error, just parse the columns that are present
        if self._header_dict['sensors_per_cycle'] != self.num_columns:
            msg = 'sensors_per_cycle from header %d does not match the number of data label columns %d' % \
                  (self._header_dict['sensors_per_cycle'], self.num_columns)
            self._exception_callback(SampleException(msg))

        log.debug("Label count: %d", self.num_columns)
예제 #11
0
    def set_state(self, state_obj):
        """
        Set the value of the state object for this parser
        @param state_obj The object to set the state to.
        @throws DatasetParserException if there is a bad state structure
        """
        if not isinstance(state_obj, dict):
            raise DatasetParserException("Invalid state structure")

        if not (Vel3dKWfpStateKey.POSITION in state_obj) or \
            not (Vel3dKWfpStateKey.RECORD_NUMBER in state_obj):

            raise DatasetParserException("Invalid state keys")

        self._record_buffer = []
        self._state = state_obj
        self._read_state = state_obj
        self.input_file.seek(state_obj[Vel3dKWfpStateKey.POSITION])
예제 #12
0
    def set_state(self, state_obj):
        """
        Set the value of the state object for this parser @param state_obj The
        object to set the state to. Should be a dict with a StateKey.POSITION
        value. The position is number of bytes into the file.
        @throws DatasetParserException if there is a bad state structure
        """
        log.trace("Attempting to set state to: %s", state_obj)
        if not isinstance(state_obj, dict):
            raise DatasetParserException("Invalid state structure")
        if not (StateKey.POSITION in state_obj):
            raise DatasetParserException("Invalid state keys")

        self._record_buffer = []
        self._state = state_obj
        self._read_state = state_obj

        # seek to it
        self._stream_handle.seek(state_obj[StateKey.POSITION])
예제 #13
0
    def __init__(self, config, stream_handle, state, state_callback,
                 publish_callback, exception_callback, *args, **kwargs):

        super(CtdmoTelemeteredParser,
              self).__init__(config, stream_handle, state, self.sieve_function,
                             state_callback, publish_callback,
                             exception_callback, *args, **kwargs)

        if not CtdmoStateKey.INDUCTIVE_ID in config:
            raise DatasetParserException("Parser config is missing %s" %
                                         CtdmoStateKey.INDUCTIVE_ID)
예제 #14
0
    def set_state(self, state_obj):
        """
        Set the value of the state object for this parser
        @param state_obj The object to set the state to.
        @throws DatasetParserException if there is a bad state structure
        """
        if not isinstance(state_obj, dict):
            raise DatasetParserException("Invalid state structure")
        if not ((StateKey.POSITION in state_obj)):
            raise DatasetParserException("Missing state key %s" % StateKey.POSITION)
        if not ((StateKey.START_OF_DATA in state_obj)):
            raise DatasetParserException("Missing state key %s" % StateKey.START_OF_DATA)

        self._record_buffer = []
        self._state = state_obj
        self._read_state = state_obj
        self._chunker.clean_all_chunks()

        # seek to the position
        self._stream_handle.seek(state_obj[StateKey.POSITION])
예제 #15
0
    def set_state(self, state_obj):
        """
        Set the value of the state object for this parser
        @param state_obj The object to set the state to. 
        @throws DatasetParserException if there is a bad state structure
        """
        log.debug("Attempting to set state to: %s", state_obj)
        if not isinstance(state_obj, dict):
            raise DatasetParserException("Invalid state structure")
        if not ((StateKey.POSITION in state_obj) and
                (StateKey.TIMESTAMP in state_obj)):
            raise DatasetParserException("Invalid state keys")
        self._timestamp = state_obj[StateKey.TIMESTAMP]
        self._record_buffer = []
        self._state = state_obj
        self._read_state = state_obj

        # make sure the chunker is clean of old data
        self._clean_all_chunker()

        # seek to the position
        self._stream_handle.seek(state_obj[StateKey.POSITION])
예제 #16
0
    def set_state(self, state_obj):
        """
        Set the value of the state object for this parser
        @param state_obj The object to set the state to.
        @throws DatasetParserException if there is a bad state structure
        """
        if not isinstance(state_obj, dict):
            error_message = 'Invalid state structure'
            log.warn(error_message)
            raise DatasetParserException(error_message)

        for key in OptaaStateKey.list():
            if not key in state_obj:
                error_message = '%s missing in state keys' % key
                log.warn(error_message)
                raise DatasetParserException(error_message)

        self._record_buffer = []
        self._state = state_obj
        self._read_state = state_obj

        self.input_file.seek(state_obj[OptaaStateKey.POSITION])
예제 #17
0
    def __init__(self, config, stream_handle, state, sieve_fn, state_callback,
                 publish_callback, instrument_id):
        """
        @param config The configuration parameters to feed into the parser
        @param stream_handle An already open file-like filehandle
        @param state The location in the file to start parsing from.
           This reflects what has already been published.
        @param sieve_fn A sieve function that might be added to a handler
           to appropriate filter out the data
        @param state_callback The callback method from the agent driver
           (ultimately the agent) to call back when a state needs to be
           updated
        @param publish_callback The callback from the agent driver (and
           ultimately from the agent) where we send our sample particle to
           be published into ION
        @param instrument_id the text string indicating the instrument to
           monitor, can be 'CT', 'AD', 'FL', 'DO', or 'PH'
        """
        super(MflmParser,
              self).__init__(config, stream_handle, state, self.sieve_function,
                             state_callback, publish_callback)

        if instrument_id not in ['CT', 'AD', 'FL', 'DO', 'PH']:
            raise DatasetParserException('instrument id %s is not recognized',
                                         instrument_id)
        self._instrument_id = instrument_id

        self._timestamp = 0.0
        self._position = [
            0, 0
        ]  # store both the start and end point for this read of data within the file
        self._record_buffer = []  # holds list of records
        # determine the EOF index
        self._stream_handle.seek(0)
        all_data = self._stream_handle.read()
        EOF = len(all_data)
        self._stream_handle.seek(0)
        self._new_seq_flag = True  # always start a new sequence on init
        self._chunk_sample_count = []
        self._chunk_new_seq = []
        self._samples_to_throw_out = None
        self._mid_sample_packets = 0
        self._read_state = {
            StateKey.TIMESTAMP: 0.0,
            StateKey.UNPROCESSED_DATA: [[0, EOF]],
            StateKey.IN_PROCESS_DATA: []
        }
        log.debug('Starting parser')

        if state:
            self.set_state(self._state)
예제 #18
0
    def set_state(self, state_obj):
        """
        Set the value of the state object for this parser
        @param state_obj The object to set the state to. Should be a list with
        a StateKey.POSITION value and StateKey.TIMESTAMP value. The position is
        number of bytes into the file, the timestamp is an NTP4 format timestamp.
        @throws DatasetParserException if there is a bad state structure
        """
        log.trace("Attempting to set state to: %s", state_obj)
        if not isinstance(state_obj, dict):
            raise DatasetParserException("Invalid state structure")
        if not (StateKey.POSITION in state_obj):
            raise DatasetParserException("Invalid state keys")

        self._chunker.buffer = ""
        self._chunker.raw_chunk_list = []
        self._chunker.data_chunk_list = []
        self._chunker.nondata_chunk_list = []
        self._record_buffer = []
        self._state = state_obj
        self._read_state = state_obj

        self._stream_handle.seek(state_obj[StateKey.POSITION])
예제 #19
0
    def set_state(self, state_obj):
        """
        Set the value of the state object for this parser
        @param state_obj The object to set the state to. 
        @throws DatasetParserException if there is a bad state structure
        """
        if not isinstance(state_obj, dict):
            raise DatasetParserException("Invalid state structure")
        if not (Vel3dKWfpStcStateKey.FIRST_RECORD in state_obj) or \
           not (Vel3dKWfpStcStateKey.POSITION in state_obj) or \
           not (Vel3dKWfpStcStateKey.VELOCITY_END in state_obj):
            raise DatasetParserException("Invalid state keys")

        #
        # Initialize parent data.
        #
        self._timestamp = 0.0
        self._record_buffer = []

        self._state = state_obj
        self._read_state = state_obj
        self.input_file.seek(self._read_state[Vel3dKWfpStcStateKey.POSITION],
                             0)
예제 #20
0
    def __init__(self, config, stream_handle, state, state_callback,
                 publish_callback, exception_callback, *args, **kwargs):

        super(CtdmoRecoveredCoParser,
              self).__init__(config, stream_handle, state, self.sieve_function,
                             state_callback, publish_callback,
                             exception_callback, *args, **kwargs)

        #
        # Verify that the required parameters are in the parser configuration.
        #
        if not CtdmoStateKey.INDUCTIVE_ID in config:
            raise DatasetParserException("Parser config is missing %s" %
                                         CtdmoStateKey.INDUCTIVE_ID)
예제 #21
0
    def _read_data(self, data_record):
        """
        Read in the column labels, data type, number of bytes of each
        data type, and the data from an ASCII glider data file.
        """
        log.debug("_read_data: Data Record: %s", data_record)

        data_dict = {}
        num_columns = self._header_dict['sensors_per_cycle']
        data_labels = self._header_dict['labels']
        #data_units = self._header_dict['data_units']
        num_bytes = self._header_dict['num_of_bytes']
        data = data_record.strip().split()
        log.trace("Split data: %s", data)
        if num_columns != len(data):
            raise DatasetParserException(
                'Glider data file does not have the ' +
                'same number of columns as described ' + 'in the header.\n' +
                'Described: %d, Actual: %d' % (num_columns, len(data)))

        # extract record to dictionary
        for ii in range(num_columns):
            log.trace("_read_data: index: %d label: %s, value: %s", ii,
                      data_labels[ii], data[ii])

            if (num_bytes[ii] == 1) or (num_bytes[ii] == 2):
                str2data = int
            elif (num_bytes[ii] == 4) or (num_bytes[ii] == 8):
                str2data = float

            # check to see if this is a latitude/longitude string
            if ('_lat' in data_labels[ii]) or ('_lon' in data_labels[ii]):
                # convert latitiude/longitude strings to decimal degrees

                value = self._string_to_ddegrees(data[ii])
            else:
                value = str2data(data[ii])

            data_dict[data_labels[ii]] = {
                'Name': data_labels[ii],
                #'Units': data_units[ii],
                #'Number_of_Bytes': int(num_bytes[ii]),
                'Data': value
            }

        log.trace("Data dict parsed: %s", data_dict)
        return data_dict
예제 #22
0
    def __init__(self, config, stream_handle, exception_callback, filename,
                 is_telemetered):

        super(OptaaDjDclParser, self).__init__(config, stream_handle,
                                               exception_callback)

        if is_telemetered:
            self.instrument_particle_class = OptaaDjDclTelemeteredInstrumentDataParticle
            self.metadata_particle_class = OptaaDjDclTelemeteredMetadataDataParticle
        else:
            self.instrument_particle_class = OptaaDjDclRecoveredInstrumentDataParticle
            self.metadata_particle_class = OptaaDjDclRecoveredMetadataDataParticle

        # Extract the start date and time from the filename and convert
        # it to the format expected for the output particle.
        # Calculate the ntp_time timestamp, the number of seconds since Jan 1, 1900,
        # based on the date and time from the filename.
        # This is the start time.  Timestamps for each particle are derived from
        # the start time.

        filename_match = FILENAME_MATCHER.search(filename)
        if filename_match is not None:
            self.start_date = \
                filename_match.group(GROUP_YEAR) + '-' + \
                filename_match.group(GROUP_MONTH) + '-' + \
                filename_match.group(GROUP_DAY) + ' ' + \
                filename_match.group(GROUP_HOUR) + ':' + \
                filename_match.group(GROUP_MINUTE) + ':' + \
                filename_match.group(GROUP_SECOND)
            timestamp = (int(filename_match.group(GROUP_YEAR)),
                         int(filename_match.group(GROUP_MONTH)),
                         int(filename_match.group(GROUP_DAY)),
                         int(filename_match.group(GROUP_HOUR)),
                         int(filename_match.group(GROUP_MINUTE)),
                         int(filename_match.group(GROUP_SECOND)), 0, 0, 0)

            # The timestamp for each particle is:
            # timestamp = start_time_from_file_name + (tn - t0)
            # where t0 is the time since power-up in the first record.

            elapsed_seconds = calendar.timegm(timestamp)
            self.ntp_time = ntplib.system_to_ntp_time(elapsed_seconds)

        else:
            error_message = 'Invalid filename %s' % filename
            log.warn(error_message)
            raise DatasetParserException(error_message)
예제 #23
0
def parse(basePythonCodePath, sourceFilePath, particleDataHdlrObj,
          serialNumToInductiveIdMapHandler):
    """
    This is the method called by Uframe
    :param basePythonCodePath This is the file system location of mi-dataset
    :param sourceFilePath This is the full path and filename of the file to be parsed
    :param particleDataHdlrObj Java Object to consume the output of the parser
    :return particleDataHdlrObj
    """

    log = get_logger()

    with open(sourceFilePath, 'r') as stream_handle:

        def exception_callback(exception):
            log.debug("Exception: %s", exception)
            particleDataHdlrObj.setParticleDataCaptureFailure()

        # extract the serial number from the file name
        serial_num = get_serial_num_from_filepath(sourceFilePath)

        # retrieve the inductive ID associated with the serial number
        induct_id = serialNumToInductiveIdMapHandler.getInductiveId(serial_num)

        if not induct_id:
            raise DatasetParserException(
                "Unable to obtain the inductive ID associated with serial num %d",
                serial_num)

        parser_config = {
            DataSetDriverConfigKeys.PARTICLE_MODULE:
            'mi.dataset.parser.ctdmo_ghqr_sio',
            DataSetDriverConfigKeys.PARTICLE_CLASS:
            ['CtdmoGhqrRecoveredInstrumentDataParticle'],
            INDUCTIVE_ID_KEY:
            induct_id
        }

        parser = CtdmoGhqrRecoveredCtParser(parser_config, stream_handle,
                                            exception_callback)

        # create and instance of the concrete driver class defined below
        driver = DataSetDriver(parser, particleDataHdlrObj)
        driver.processFileStream()

    return particleDataHdlrObj
예제 #24
0
def get_serial_num_from_filepath(filepath):
    """
    Parse the serial number from the file path
    :param filepath: The full path of the file to extract the serial number from the name
    :return: serial number
    """

    # get just the filename from the full path
    filename = os.path.basename(filepath)

    # match the filename, serial number is the first group
    filename_match = FILENAME_MATCHER.match(filename)

    # can't run parser without the serial number, raise an exception if it can't be found
    if not filename_match:
        raise DatasetParserException(
            "Unable to parse serial number from file name %s", filename)

    # return serial number as an int
    return int(filename_match.group(1))
예제 #25
0
    def _read_data(self, data_record):
        """
        Read in the column labels, data type, number of bytes of each
        data type, and the data from an ASCII glider data file.
        """
        data_dict = {}
        data_labels = self._header_dict['labels']
        data = data_record.strip().split()

        if self.num_columns != len(data):
            err_msg = "GliderParser._read_data(): Num Of Columns NOT EQUAL to Num of Data items: " + \
                      "Expected Columns= %s vs Actual Data= %s" % (self.num_columns, len(data))
            log.error(err_msg)
            raise DatasetParserException(err_msg)

        # extract record to dictionary
        for ii, value in enumerate(data):
            label = data_labels[ii]
            data_dict[label] = value

        return data_dict
예제 #26
0
    def __init__(self, config, stream_handle, exception_callback):

        #
        # Verify that the required parameters are in the parser configuration.
        #
        if not INDUCTIVE_ID_KEY in config:
            raise DatasetParserException("Parser config is missing %s" %
                                         INDUCTIVE_ID_KEY)

        #
        # File is ASCII with records separated by newlines.
        #
        super(CtdmoGhqrRecoveredCtParser,
              self).__init__(config, stream_handle, exception_callback)

        #
        # set flags to indicate the end of Configuration has not been reached
        # and the serial number has not been found.
        #
        self._serial_number = None
        self._end_config = False

        self.input_file = stream_handle
예제 #27
0
    def __init__(self,
                 config,
                 stream_handle,
                 exception_callback,
                 data_record_regex,
                 header_key_list=None,
                 ignore_matcher=None):
        """
        This method is a constructor that will instantiate an CsppParser object.
        @param config The configuration for this CsppParser parser
        @param stream_handle The handle to the data stream containing the cspp data
        @param exception_callback The function to call to report exceptions
        @param data_record_regex The data regex that should be used to obtain data records
        @param header_key_list The list of header keys expected within a header
        @param ignore_matcher A matcher from a regex to use to ignore expected junk lines
        """

        self._data_record_matcher = None
        self._header_and_first_data_record_matcher = None
        self._ignore_matcher = ignore_matcher

        # Ensure that we have a data regex
        if data_record_regex is None:
            log.warn('A data_record_regex is required, but None was given')
            raise DatasetParserException("Must provide a data_record_regex")
        else:
            self._data_record_matcher = re.compile(data_record_regex)

        # Build up the header state dictionary using the default her key list ot one that was provided
        self._header_state = {}

        if header_key_list is None:
            header_key_list = DEFAULT_HEADER_KEY_LIST

        for header_key in header_key_list:
            self._header_state[header_key] = None

        # Obtain the particle classes dictionary from the config data
        if DataSetDriverConfigKeys.PARTICLE_CLASSES_DICT in config:
            particle_classes_dict = config.get(
                DataSetDriverConfigKeys.PARTICLE_CLASSES_DICT)
            # Set the metadata and data particle classes to be used later

            if METADATA_PARTICLE_CLASS_KEY in particle_classes_dict and \
               DATA_PARTICLE_CLASS_KEY in particle_classes_dict:

                self._data_particle_class = particle_classes_dict.get(
                    DATA_PARTICLE_CLASS_KEY)
                self._metadata_particle_class = particle_classes_dict.get(
                    METADATA_PARTICLE_CLASS_KEY)
            else:
                log.warning(
                    'Configuration missing metadata or data particle class key in particle classes dict'
                )
                raise ConfigurationException(
                    'Configuration missing metadata or data particle class key in particle classes dict'
                )
        else:
            log.warning('Configuration missing particle classes dict')
            raise ConfigurationException(
                'Configuration missing particle classes dict')

        # Initialize the record buffer to an empty list
        self._record_buffer = []

        # Initialize the metadata flag
        self._metadata_extracted = False

        # Call the superclass constructor
        super(CsppParser, self).__init__(config, stream_handle,
                                         exception_callback)
예제 #28
0
    def __init__(self,
                 config,
                 state,
                 stream_handle,
                 state_callback,
                 publish_callback,
                 exception_callback,
                 data_record_regex,
                 header_key_list=None,
                 ignore_matcher=None,
                 *args,
                 **kwargs):
        """
        This method is a constructor that will instantiate an CsppParser object.
        @param config The configuration for this CsppParser parser
        @param state The state the CsppParser should use to initialize itself
        @param stream_handle The handle to the data stream containing the cspp data
        @param state_callback The function to call upon detecting state changes
        @param publish_callback The function to call to provide particles
        @param exception_callback The function to call to report exceptions
        @param data_record_regex The data regex that should be used to obtain data records
        @param header_key_list The list of header keys expected within a header
        @param ignore_regex A regex to use to ignore expected junk lines
        """

        self._data_record_matcher = None
        self._header_and_first_data_record_matcher = None
        self._ignore_matcher = ignore_matcher

        # Ensure that we have a data regex
        if data_record_regex is None:
            log.warn('A data_record_regex is required, but None was given')
            raise DatasetParserException("Must provide a data_record_regex")
        else:
            self._data_record_matcher = re.compile(data_record_regex)

        # Build up the header state dictionary using the default her key list ot one that was provided
        self._header_state = {}

        if header_key_list is None:
            header_key_list = DEFAULT_HEADER_KEY_LIST

        for header_key in header_key_list:
            self._header_state[header_key] = None

        # Obtain the particle classes dictionary from the config data
        if DataSetDriverConfigKeys.PARTICLE_CLASSES_DICT in config:
            particle_classes_dict = config.get(
                DataSetDriverConfigKeys.PARTICLE_CLASSES_DICT)
            # Set the metadata and data particle classes to be used later
            if METADATA_PARTICLE_CLASS_KEY in particle_classes_dict and \
            DATA_PARTICLE_CLASS_KEY in particle_classes_dict:
                self._data_particle_class = particle_classes_dict.get(
                    DATA_PARTICLE_CLASS_KEY)
                self._metadata_particle_class = particle_classes_dict.get(
                    METADATA_PARTICLE_CLASS_KEY)
            else:
                log.warning(
                    'Configuration missing metadata or data particle class key in particle classes dict'
                )
                raise ConfigurationException(
                    'Configuration missing metadata or data particle class key in particle classes dict'
                )
        else:
            log.warning('Configuration missing particle classes dict')
            raise ConfigurationException(
                'Configuration missing particle classes dict')

        # Initialize the record buffer to an empty list
        self._record_buffer = []

        # Initialize the read state
        self._read_state = {
            StateKey.POSITION: 0,
            StateKey.METADATA_EXTRACTED: False
        }

        # Call the superclass constructor
        super(CsppParser, self).__init__(
            config, stream_handle, state,
            partial(StringChunker.regex_sieve_function,
                    regex_list=[SIEVE_MATCHER]), state_callback,
            publish_callback, exception_callback, *args, **kwargs)

        # If provided a state, set it.  This needs to be done post superclass __init__
        if state:
            self.set_state(state)
예제 #29
0
    def _read_file_definition(self):
        """
        Read the first 14 lines of the data file for the file definitions, values
        are colon delimited key value pairs. The pairs are parsed and stored in
        header_dict member.
        """
        row_count = 0
        #
        # THIS METHOD ASSUMES A 14 ROW HEADER
        # If the number of header row lines in the glider ASCII input file changes from 14,
        # this method will NOT WORK
        num_hdr_lines = 14

        header_pattern = r'(.*): (.*)$'
        header_re = re.compile(header_pattern)

        line = self._stream_handle.readline()

        while line and row_count < num_hdr_lines:

            match = header_re.match(line)

            if match:
                key = match.group(1)
                value = match.group(2)
                value = value.strip()

                # update num_hdr_lines based on the header info.
                if key == 'num_ascii_tags':
                    # this key has a required value of 14, otherwise we don't know how to parse the file
                    if int(value) != num_hdr_lines:
                        raise DatasetParserException(
                            "Header must be %d rows, but it is %s" %
                            (num_hdr_lines, value))

                elif key == 'num_label_lines':
                    # this key has a required value of 3, otherwise we don't know how to parse the file
                    if int(value) != 3:
                        raise DatasetParserException(
                            "There must be 3 Label lines from the header for this parser"
                        )

                elif key == 'sensors_per_cycle':
                    # save for future use
                    self._header_dict[key] = int(value)

                elif key in [
                        'filename_label', 'mission_name', 'fileopen_time'
                ]:
                    # create a dictionary of these 3 key/value pairs strings from
                    # the header rows that need to be saved for future use
                    self._header_dict[key] = value

            else:
                log.warn("Failed to parse header row: %s.", line)

            row_count += 1
            # only read the header lines in this method so make sure we stop
            if row_count < num_hdr_lines:
                line = self._stream_handle.readline()

        if row_count < num_hdr_lines:
            log.error('Not enough data lines for a full header')
            raise DatasetParserException(
                'Not enough data lines for a full header')
예제 #30
0
    def __init__(self,
                 config,
                 stream_handle,
                 state,
                 state_callback,
                 publish_callback,
                 exception_callback,
                 filename,
                 instrument_particle_class,
                 metadata_particle_class,
                 *args, **kwargs):

        super(OptaaDjDclParser, self).__init__(config,
                                          stream_handle,
                                          state,
                                          self.sieve_function,
                                          state_callback,
                                          publish_callback,
                                          exception_callback,
                                          *args,
                                          **kwargs)

        self.input_file = stream_handle

        # If there's an existing state, update to it.
        # Otherwise default the position within the file to the beginning
        # and metadata particle not having been generated.

        if state is not None:
            self.set_state(state)
        else:
            self.set_state({OptaaStateKey.POSITION: 0,
                            OptaaStateKey.METADATA_GENERATED: False,
                            OptaaStateKey.TIME_SINCE_POWER_UP: 0.0})

        # Extract the start date and time from the filename and convert
        # it to the format expected for the output particle.
        # Calculate the ntp_time timestamp, the number of seconds since Jan 1, 1900,
        # based on the date and time from the filename.
        # This is the start time.  Timestamps for each particle are derived from
        # the start time.

        filename_match = FILENAME_MATCHER.match(filename)
        if filename_match is not None:
            self.start_date = \
                filename_match.group(GROUP_YEAR) + '-' + \
                filename_match.group(GROUP_MONTH) + '-' + \
                filename_match.group(GROUP_DAY) + ' ' + \
                filename_match.group(GROUP_HOUR) + ':' + \
                filename_match.group(GROUP_MINUTE) + ':' + \
                filename_match.group(GROUP_SECOND)
            timestamp = (
                int(filename_match.group(GROUP_YEAR)),
                int(filename_match.group(GROUP_MONTH)),
                int(filename_match.group(GROUP_DAY)),
                int(filename_match.group(GROUP_HOUR)),
                int(filename_match.group(GROUP_MINUTE)),
                int(filename_match.group(GROUP_SECOND)),
                0, 0, 0)

            # The timestamp for each particle is:
            # timestamp = start_time_from_file_name + (tn - t0)
            # where t0 is the time since power-up in the first record.

            elapsed_seconds = calendar.timegm(timestamp)
            self.ntp_time = ntplib.system_to_ntp_time(elapsed_seconds) - \
                self._read_state[OptaaStateKey.TIME_SINCE_POWER_UP]

        else:
            error_message = 'Invalid filename %s' % filename
            log.warn(error_message)
            raise DatasetParserException(error_message)

        # Save the names of the particle classes to be generated.

        self.instrument_particle_class = instrument_particle_class
        self.metadata_particle_class = metadata_particle_class