def test_bad_data(self):
        """
        Ensure that bad data is skipped when it exists.
        """

        # the first data record in this file is corrupted and will be ignored
        # we expect the first 2 particles to be the metadata particle and the
        # intrument particle from the data record after the corrupted one
        with open(os.path.join(RESOURCE_PATH, '11079419_BAD_PPB_OCR.txt'), 'rU') as file_handle:

            log.debug(self.exception_callback_value)

            parser = SpkirAbjCsppParser(self._recov_config,
                                        file_handle,
                                        self.exception_callback)

            particles = parser.get_records(2)

            self.assert_particles(particles, 'bad_data_record_recov.yml', RESOURCE_PATH)

        with open(os.path.join(RESOURCE_PATH, '11079419_BAD_PPB_OCR.txt'), 'rU') as file_handle:

            log.debug(self.exception_callback_value)

            parser = SpkirAbjCsppParser(self._telem_config,
                                        file_handle,
                                        self.exception_callback)

            particles = parser.get_records(2)

            self.assert_particles(particles, 'bad_data_record_telem.yml', RESOURCE_PATH)
    def test_simple(self):
        """
        Read test data and pull out data particles
        Assert that the results are those we expected.
        """

        with open(os.path.join(RESOURCE_PATH, '11079364_PPD_OCR.txt'), 'rU') as file_handle:
            # Note: since the recovered and teelemetered parser and particles are common
            # to each other, testing one is sufficient, will be completely tested
            # in driver tests

            parser = SpkirAbjCsppParser(self._recov_config,
                                        file_handle,
                                        self.exception_callback)

            particles = parser.get_records(20)

            log.debug("*** test_simple Num particles %s", len(particles))

            self.assert_particles(particles, '11079364_PPD_OCR_recov.yml', RESOURCE_PATH)

        with open(os.path.join(RESOURCE_PATH, '11079364_PPD_OCR.txt'), 'rU') as file_handle:
            # Note: since the recovered and teelemetered parser and particles are common
            # to each other, testing one is sufficient, will be completely tested
            # in driver tests

            parser = SpkirAbjCsppParser(self._telem_config,
                                        file_handle,
                                        self.exception_callback)

            particles = parser.get_records(20)

            log.debug("*** test_simple Num particles %s", len(particles))

            self.assert_particles(particles, '11079364_PPD_OCR_telem.yml', RESOURCE_PATH)
    def test_extra_data(self):
        """
        Ensure that bad data is skipped when it exists.
        """

        # the first 2 data record in this file are corrupted by adding additional
        # data vlaues separated by tabs and will be ignored
        # we expect the first 2 particles to be the metadata particle and the
        # intrument particle from the data record after the corrupted one
        with open(
                os.path.join(RESOURCE_PATH, '11079364_EXTRA_DATA_PPD_OCR.txt'),
                'r') as file_handle:

            log.info(self.exception_callback_value)

            parser = SpkirAbjCsppParser(self._recov_config, file_handle,
                                        self.exception_callback)

            particles = parser.get_records(2)

            self.assertEquals(len(self.exception_callback_value), 2)

            for exception in self.exception_callback_value:
                self.assert_(isinstance(exception, RecoverableSampleException))

            # expect to see a recoverable sample exception in the log
            log.debug('TEST EXTRA DATA exception call back is %s',
                      self.exception_callback_value)

            self.assert_particles(particles, 'extra_data_values_recov.yml',
                                  RESOURCE_PATH)

            self.exception_callback_value = []

        with open(
                os.path.join(RESOURCE_PATH, '11079364_EXTRA_DATA_PPD_OCR.txt'),
                'r') as file_handle:

            log.info(self.exception_callback_value)

            parser = SpkirAbjCsppParser(self._telem_config, file_handle,
                                        self.exception_callback)

            particles = parser.get_records(2)

            self.assertEquals(len(self.exception_callback_value), 2)

            for exception in self.exception_callback_value:
                self.assert_(isinstance(exception, RecoverableSampleException))

            # expect to see a recoverable sample exception in the log
            log.debug('TEST EXTRA DATA exception call back is %s',
                      self.exception_callback_value)

            self.assert_particles(particles, 'extra_data_values_telem.yml',
                                  RESOURCE_PATH)
Example #4
0
    def test_bad_data(self):
        """
        Ensure that bad data is skipped when it exists.
        """

        # the first data record in this file is corrupted and will be ignored
        # we expect the first 2 particles to be the metadata particle and the
        # intrument particle from the data record after the corrupted one

        file_path = os.path.join(RESOURCE_PATH, '11079419_BAD_PPB_OCR.txt')
        stream_handle = open(file_path, 'rb')

        log.info(self.exception_callback_value)

        parser = SpkirAbjCsppParser(
            self.config.get(DataTypeKey.SPKIR_ABJ_CSPP_RECOVERED), None,
            stream_handle, self.state_callback, self.pub_callback,
            self.exception_callback)

        particles = parser.get_records(2)

        expected_results = self.get_dict_from_yml('bad_data_record.yml')

        self.assertTrue(len(particles) == 2)

        for i in range(len(particles)):
            self.assert_result(expected_results['data'][i], particles[i])

        stream_handle.close()
Example #5
0
    def test_get_many(self):
        """
        Read test data and pull out multiple data particles at one time.
        Assert that the results are those we expected.
        """
        file_path = os.path.join(RESOURCE_PATH, '11079419_PPB_OCR.txt')
        stream_handle = open(file_path, 'r')

        # Note: since the recovered and teelemetered parser and particles are common
        # to each other, testing one is sufficient, will be completely tested
        # in driver tests

        parser = SpkirAbjCsppParser(
            self.config.get(DataTypeKey.SPKIR_ABJ_CSPP_RECOVERED), None,
            stream_handle, self.state_callback, self.pub_callback,
            self.exception_callback)

        # try to get 2000 particles, there are only 1623 data records
        # so should get 1624 including the meta data
        particles = parser.get_records(2000)

        log.debug("*** test_get_many Num particles %s", len(particles))
        self.assertEqual(len(particles), 1624)

        stream_handle.close()
Example #6
0
    def test_simple(self):
        """
        Read test data and pull out data particles
        Assert that the results are those we expected.
        """
        file_path = os.path.join(RESOURCE_PATH, '11079419_PPB_OCR.txt')
        stream_handle = open(file_path, 'r')

        # Note: since the recovered and teelemetered parser and particles are common
        # to each other, testing one is sufficient, will be completely tested
        # in driver tests

        parser = SpkirAbjCsppParser(
            self.config.get(DataTypeKey.SPKIR_ABJ_CSPP_RECOVERED), None,
            stream_handle, self.state_callback, self.pub_callback,
            self.exception_callback)

        particles = parser.get_records(20)

        log.debug("*** test_simple Num particles %s", len(particles))

        # check the first particle, which should be the metadata particle (recovered)
        test_data = self.get_dict_from_yml('11079419_PPB_OCR_recov.yml')

        # check all the values against expected results.

        for i in range(len(particles)):

            self.assert_result(test_data['data'][i], particles[i])

        stream_handle.close()
    def test_get_many(self):
        """
        Read test data and pull out multiple data particles at one time.
        Assert that the results are those we expected.
        """
        with open(os.path.join(RESOURCE_PATH, '11079419_PPB_OCR.txt'),
                  'r') as file_handle:
            # Note: since the recovered and teelemetered parser and particles are common
            # to each other, testing one is sufficient, will be completely tested
            # in driver tests

            parser = SpkirAbjCsppParser(self._recov_config, file_handle,
                                        self.exception_callback)

            # try to get 2000 particles, there are only 1623 data records
            # so should get 1624 including the meta data
            particles = parser.get_records(2000)

            log.debug("*** test_get_many Num particles %s", len(particles))

            self.assert_particles(particles, '11079419_PPB_OCR_recov.yml',
                                  RESOURCE_PATH)

        with open(os.path.join(RESOURCE_PATH, '11079419_PPB_OCR.txt'),
                  'r') as file_handle:
            # Note: since the recovered and teelemetered parser and particles are common
            # to each other, testing one is sufficient, will be completely tested
            # in driver tests

            parser = SpkirAbjCsppParser(self._telem_config, file_handle,
                                        self.exception_callback)

            # try to get 2000 particles, there are only 1623 data records
            # so should get 1624 including the meta data
            particles = parser.get_records(2000)

            log.debug("*** test_get_many Num particles %s", len(particles))

            self.assert_particles(particles, '11079419_PPB_OCR_telem.yml',
                                  RESOURCE_PATH)
Example #8
0
    def _build_parser(self, parser_state, stream_in, data_key):
        """
        Build and return the parser
        """

        config = self._parser_config.get(data_key)

        if config is None:
            log.warn(
                'Parser config does not exist for key = %s.  Not building parser',
                data_key)
            raise ConfigurationException

        if data_key == DataTypeKey.SPKIR_ABJ_CSPP_TELEMETERED:

            config.update({
                DataSetDriverConfigKeys.PARTICLE_MODULE:
                'mi.dataset.parser.spkir_abj_cspp',
                DataSetDriverConfigKeys.PARTICLE_CLASS: None,
                DataSetDriverConfigKeys.PARTICLE_CLASSES_DICT: {
                    METADATA_PARTICLE_CLASS_KEY:
                    SpkirAbjCsppMetadataTelemeteredDataParticle,
                    DATA_PARTICLE_CLASS_KEY:
                    SpkirAbjCsppInstrumentTelemeteredDataParticle,
                }
            })
        elif data_key == DataTypeKey.SPKIR_ABJ_CSPP_RECOVERED:

            config.update({
                DataSetDriverConfigKeys.PARTICLE_MODULE:
                'mi.dataset.parser.spkir_abj_cspp',
                DataSetDriverConfigKeys.PARTICLE_CLASS: None,
                DataSetDriverConfigKeys.PARTICLE_CLASSES_DICT: {
                    METADATA_PARTICLE_CLASS_KEY:
                    SpkirAbjCsppMetadataRecoveredDataParticle,
                    DATA_PARTICLE_CLASS_KEY:
                    SpkirAbjCsppInstrumentRecoveredDataParticle,
                }
            })
        else:
            log.warn('Invalid Data_Key %s.  Not building parser', data_key)
            raise ConfigurationException

        log.debug("_build_parser  Config: %s", config)

        parser = SpkirAbjCsppParser(
            config, parser_state, stream_in,
            lambda state, ingested: self._save_parser_state(
                state, data_key, ingested), self._data_callback,
            self._sample_exception_callback)

        return parser
    def _build_parser(self, stream_handle):

        parser_config = {
            DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.spkir_abj_cspp',
            DataSetDriverConfigKeys.PARTICLE_CLASS: None,
            DataSetDriverConfigKeys.PARTICLE_CLASSES_DICT: {
                METADATA_PARTICLE_CLASS_KEY: SpkirAbjCsppMetadataRecoveredDataParticle,
                DATA_PARTICLE_CLASS_KEY: SpkirAbjCsppInstrumentRecoveredDataParticle,
            }
        }

        parser = SpkirAbjCsppParser(parser_config, stream_handle,
                                    self._exception_callback)

        return parser
Example #10
0
    def test_set_state(self):
        """
        Test changing to a new state after initializing the parser and
        reading data, as if new data has been found and the state has
        changed
        """

        file_path = os.path.join(RESOURCE_PATH, '11079419_PPB_OCR.txt')
        stream_handle = open(file_path, 'r')

        # 11079419_PPB_OCR_20.yml has the metadata and the first 19
        # instrument particles in it
        expected_results = self.get_dict_from_yml('11079419_PPB_OCR_recov.yml')

        parser = SpkirAbjCsppParser(
            self.config.get(DataTypeKey.SPKIR_ABJ_CSPP_RECOVERED), None,
            stream_handle, self.state_callback, self.pub_callback,
            self.exception_callback)

        particles = parser.get_records(2)

        log.debug("Num particles: %s", len(particles))

        self.assertTrue(len(particles) == 2)

        for i in range(len(particles)):
            self.assert_result(expected_results['data'][i], particles[i])

        # position 3656 is the byte at the start of the 18th data record
        new_state = {
            StateKey.POSITION: 3769,
            StateKey.METADATA_EXTRACTED: True
        }

        parser.set_state(new_state)

        particles = parser.get_records(2)

        self.assertTrue(len(particles) == 2)

        # offset in the expected results
        offset = 18
        for i in range(len(particles)):
            self.assert_result(expected_results['data'][i + offset],
                               particles[i])

        stream_handle.close()
Example #11
0
    def test_extra_data(self):
        """
        Ensure that bad data is skipped when it exists.
        """

        # the first 2 data record in this file are corrupted by adding additional
        # data vlaues separated by tabs and will be ignored
        # we expect the first 2 particles to be the metadata particle and the
        # intrument particle from the data record after the corrupted one

        file_path = os.path.join(RESOURCE_PATH,
                                 '11079364_EXTRA_DATA_PPD_OCR.txt')

        stream_handle = open(file_path, 'rb')

        log.info(self.exception_callback_value)

        parser = SpkirAbjCsppParser(
            self.config.get(DataTypeKey.SPKIR_ABJ_CSPP_RECOVERED), None,
            stream_handle, self.state_callback, self.pub_callback,
            self.exception_callback)

        particles = parser.get_records(2)

        self.assertTrue(self.exception_callback_value != None)

        self.assert_(
            isinstance(self.exception_callback_value,
                       RecoverableSampleException))

        # expect to see a recoverable sample exception in the log
        log.debug('TEST EXTRA DATA exception call back is %s',
                  self.exception_callback_value)

        expected_results = self.get_dict_from_yml('extra_data_values.yml')

        self.assertTrue(len(particles) == 2)

        # since the first two records were corrupted the first records recieved
        # should be metadata particle with the timestamp of the 3rd data row
        # and the insturment particle from the 3rd row

        for i in range(len(particles)):
            self.assert_result(expected_results['data'][i], particles[i])

        stream_handle.close()
Example #12
0
    def create_yml(self):
        """
        This utility creates a yml file
        """

        #ADCP_data_20130702.PD0 has one record in it
        fid = open(os.path.join(RESOURCE_PATH, '11079419_PPB_OCR.txt'), 'r')

        stream_handle = fid
        parser = SpkirAbjCsppParser(
            self.config.get(DataTypeKey.SPKIR_ABJ_CSPP_RECOVERED), None,
            stream_handle, self.state_callback, self.pub_callback,
            self.exception_callback)

        particles = parser.get_records(20)

        self.particle_to_yml(particles, '11079419_PPB_OCR_recov.yml')
        fid.close()
Example #13
0
    def test_mid_state_start(self):
        """
        This test makes sure that we retrieve the correct particles upon starting with an offset state.
        """

        file_path = os.path.join(RESOURCE_PATH, '11079419_PPB_OCR.txt')
        stream_handle = open(file_path, 'rb')

        # position 1410 is the end of the frist data record, which would have produced the
        # metadata particle and the first instrument particle
        initial_state = {
            StateKey.POSITION: 1410,
            StateKey.METADATA_EXTRACTED: True
        }

        parser = SpkirAbjCsppParser(
            self.config.get(DataTypeKey.SPKIR_ABJ_CSPP_RECOVERED),
            initial_state, stream_handle, self.state_callback,
            self.pub_callback, self.exception_callback)

        #expect to get the 2nd and 3rd instrument particles next
        particles = parser.get_records(2)

        log.debug("Num particles: %s", len(particles))

        self.assertTrue(len(particles) == 2)

        expected_results = self.get_dict_from_yml('mid_state_start.yml')

        for i in range(len(particles)):
            self.assert_result(expected_results['data'][i], particles[i])

        # now expect the state to be the end of the 4 data record and metadata sent
        the_new_state = {
            StateKey.POSITION: 1704,
            StateKey.METADATA_EXTRACTED: True
        }
        log.debug("********** expected state: %s", the_new_state)
        log.debug("******** new parser state: %s", parser._state)
        self.assertTrue(parser._state == the_new_state)

        stream_handle.close()