def test_bad_data(self): """ Ensure that bad data is skipped when it exists. """ # the first data record in this file is corrupted and will be ignored # we expect the first 2 particles to be the metadata particle and the # intrument particle from the data record after the corrupted one with open(os.path.join(RESOURCE_PATH, '11079419_BAD_PPB_OCR.txt'), 'rU') as file_handle: log.debug(self.exception_callback_value) parser = SpkirAbjCsppParser(self._recov_config, file_handle, self.exception_callback) particles = parser.get_records(2) self.assert_particles(particles, 'bad_data_record_recov.yml', RESOURCE_PATH) with open(os.path.join(RESOURCE_PATH, '11079419_BAD_PPB_OCR.txt'), 'rU') as file_handle: log.debug(self.exception_callback_value) parser = SpkirAbjCsppParser(self._telem_config, file_handle, self.exception_callback) particles = parser.get_records(2) self.assert_particles(particles, 'bad_data_record_telem.yml', RESOURCE_PATH)
def test_simple(self): """ Read test data and pull out data particles Assert that the results are those we expected. """ with open(os.path.join(RESOURCE_PATH, '11079364_PPD_OCR.txt'), 'rU') as file_handle: # Note: since the recovered and teelemetered parser and particles are common # to each other, testing one is sufficient, will be completely tested # in driver tests parser = SpkirAbjCsppParser(self._recov_config, file_handle, self.exception_callback) particles = parser.get_records(20) log.debug("*** test_simple Num particles %s", len(particles)) self.assert_particles(particles, '11079364_PPD_OCR_recov.yml', RESOURCE_PATH) with open(os.path.join(RESOURCE_PATH, '11079364_PPD_OCR.txt'), 'rU') as file_handle: # Note: since the recovered and teelemetered parser and particles are common # to each other, testing one is sufficient, will be completely tested # in driver tests parser = SpkirAbjCsppParser(self._telem_config, file_handle, self.exception_callback) particles = parser.get_records(20) log.debug("*** test_simple Num particles %s", len(particles)) self.assert_particles(particles, '11079364_PPD_OCR_telem.yml', RESOURCE_PATH)
def test_extra_data(self): """ Ensure that bad data is skipped when it exists. """ # the first 2 data record in this file are corrupted by adding additional # data vlaues separated by tabs and will be ignored # we expect the first 2 particles to be the metadata particle and the # intrument particle from the data record after the corrupted one with open( os.path.join(RESOURCE_PATH, '11079364_EXTRA_DATA_PPD_OCR.txt'), 'r') as file_handle: log.info(self.exception_callback_value) parser = SpkirAbjCsppParser(self._recov_config, file_handle, self.exception_callback) particles = parser.get_records(2) self.assertEquals(len(self.exception_callback_value), 2) for exception in self.exception_callback_value: self.assert_(isinstance(exception, RecoverableSampleException)) # expect to see a recoverable sample exception in the log log.debug('TEST EXTRA DATA exception call back is %s', self.exception_callback_value) self.assert_particles(particles, 'extra_data_values_recov.yml', RESOURCE_PATH) self.exception_callback_value = [] with open( os.path.join(RESOURCE_PATH, '11079364_EXTRA_DATA_PPD_OCR.txt'), 'r') as file_handle: log.info(self.exception_callback_value) parser = SpkirAbjCsppParser(self._telem_config, file_handle, self.exception_callback) particles = parser.get_records(2) self.assertEquals(len(self.exception_callback_value), 2) for exception in self.exception_callback_value: self.assert_(isinstance(exception, RecoverableSampleException)) # expect to see a recoverable sample exception in the log log.debug('TEST EXTRA DATA exception call back is %s', self.exception_callback_value) self.assert_particles(particles, 'extra_data_values_telem.yml', RESOURCE_PATH)
def test_bad_data(self): """ Ensure that bad data is skipped when it exists. """ # the first data record in this file is corrupted and will be ignored # we expect the first 2 particles to be the metadata particle and the # intrument particle from the data record after the corrupted one file_path = os.path.join(RESOURCE_PATH, '11079419_BAD_PPB_OCR.txt') stream_handle = open(file_path, 'rb') log.info(self.exception_callback_value) parser = SpkirAbjCsppParser( self.config.get(DataTypeKey.SPKIR_ABJ_CSPP_RECOVERED), None, stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = parser.get_records(2) expected_results = self.get_dict_from_yml('bad_data_record.yml') self.assertTrue(len(particles) == 2) for i in range(len(particles)): self.assert_result(expected_results['data'][i], particles[i]) stream_handle.close()
def test_get_many(self): """ Read test data and pull out multiple data particles at one time. Assert that the results are those we expected. """ file_path = os.path.join(RESOURCE_PATH, '11079419_PPB_OCR.txt') stream_handle = open(file_path, 'r') # Note: since the recovered and teelemetered parser and particles are common # to each other, testing one is sufficient, will be completely tested # in driver tests parser = SpkirAbjCsppParser( self.config.get(DataTypeKey.SPKIR_ABJ_CSPP_RECOVERED), None, stream_handle, self.state_callback, self.pub_callback, self.exception_callback) # try to get 2000 particles, there are only 1623 data records # so should get 1624 including the meta data particles = parser.get_records(2000) log.debug("*** test_get_many Num particles %s", len(particles)) self.assertEqual(len(particles), 1624) stream_handle.close()
def test_simple(self): """ Read test data and pull out data particles Assert that the results are those we expected. """ file_path = os.path.join(RESOURCE_PATH, '11079419_PPB_OCR.txt') stream_handle = open(file_path, 'r') # Note: since the recovered and teelemetered parser and particles are common # to each other, testing one is sufficient, will be completely tested # in driver tests parser = SpkirAbjCsppParser( self.config.get(DataTypeKey.SPKIR_ABJ_CSPP_RECOVERED), None, stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = parser.get_records(20) log.debug("*** test_simple Num particles %s", len(particles)) # check the first particle, which should be the metadata particle (recovered) test_data = self.get_dict_from_yml('11079419_PPB_OCR_recov.yml') # check all the values against expected results. for i in range(len(particles)): self.assert_result(test_data['data'][i], particles[i]) stream_handle.close()
def test_get_many(self): """ Read test data and pull out multiple data particles at one time. Assert that the results are those we expected. """ with open(os.path.join(RESOURCE_PATH, '11079419_PPB_OCR.txt'), 'r') as file_handle: # Note: since the recovered and teelemetered parser and particles are common # to each other, testing one is sufficient, will be completely tested # in driver tests parser = SpkirAbjCsppParser(self._recov_config, file_handle, self.exception_callback) # try to get 2000 particles, there are only 1623 data records # so should get 1624 including the meta data particles = parser.get_records(2000) log.debug("*** test_get_many Num particles %s", len(particles)) self.assert_particles(particles, '11079419_PPB_OCR_recov.yml', RESOURCE_PATH) with open(os.path.join(RESOURCE_PATH, '11079419_PPB_OCR.txt'), 'r') as file_handle: # Note: since the recovered and teelemetered parser and particles are common # to each other, testing one is sufficient, will be completely tested # in driver tests parser = SpkirAbjCsppParser(self._telem_config, file_handle, self.exception_callback) # try to get 2000 particles, there are only 1623 data records # so should get 1624 including the meta data particles = parser.get_records(2000) log.debug("*** test_get_many Num particles %s", len(particles)) self.assert_particles(particles, '11079419_PPB_OCR_telem.yml', RESOURCE_PATH)
def _build_parser(self, parser_state, stream_in, data_key): """ Build and return the parser """ config = self._parser_config.get(data_key) if config is None: log.warn( 'Parser config does not exist for key = %s. Not building parser', data_key) raise ConfigurationException if data_key == DataTypeKey.SPKIR_ABJ_CSPP_TELEMETERED: config.update({ DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.spkir_abj_cspp', DataSetDriverConfigKeys.PARTICLE_CLASS: None, DataSetDriverConfigKeys.PARTICLE_CLASSES_DICT: { METADATA_PARTICLE_CLASS_KEY: SpkirAbjCsppMetadataTelemeteredDataParticle, DATA_PARTICLE_CLASS_KEY: SpkirAbjCsppInstrumentTelemeteredDataParticle, } }) elif data_key == DataTypeKey.SPKIR_ABJ_CSPP_RECOVERED: config.update({ DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.spkir_abj_cspp', DataSetDriverConfigKeys.PARTICLE_CLASS: None, DataSetDriverConfigKeys.PARTICLE_CLASSES_DICT: { METADATA_PARTICLE_CLASS_KEY: SpkirAbjCsppMetadataRecoveredDataParticle, DATA_PARTICLE_CLASS_KEY: SpkirAbjCsppInstrumentRecoveredDataParticle, } }) else: log.warn('Invalid Data_Key %s. Not building parser', data_key) raise ConfigurationException log.debug("_build_parser Config: %s", config) parser = SpkirAbjCsppParser( config, parser_state, stream_in, lambda state, ingested: self._save_parser_state( state, data_key, ingested), self._data_callback, self._sample_exception_callback) return parser
def _build_parser(self, stream_handle): parser_config = { DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.spkir_abj_cspp', DataSetDriverConfigKeys.PARTICLE_CLASS: None, DataSetDriverConfigKeys.PARTICLE_CLASSES_DICT: { METADATA_PARTICLE_CLASS_KEY: SpkirAbjCsppMetadataRecoveredDataParticle, DATA_PARTICLE_CLASS_KEY: SpkirAbjCsppInstrumentRecoveredDataParticle, } } parser = SpkirAbjCsppParser(parser_config, stream_handle, self._exception_callback) return parser
def test_set_state(self): """ Test changing to a new state after initializing the parser and reading data, as if new data has been found and the state has changed """ file_path = os.path.join(RESOURCE_PATH, '11079419_PPB_OCR.txt') stream_handle = open(file_path, 'r') # 11079419_PPB_OCR_20.yml has the metadata and the first 19 # instrument particles in it expected_results = self.get_dict_from_yml('11079419_PPB_OCR_recov.yml') parser = SpkirAbjCsppParser( self.config.get(DataTypeKey.SPKIR_ABJ_CSPP_RECOVERED), None, stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = parser.get_records(2) log.debug("Num particles: %s", len(particles)) self.assertTrue(len(particles) == 2) for i in range(len(particles)): self.assert_result(expected_results['data'][i], particles[i]) # position 3656 is the byte at the start of the 18th data record new_state = { StateKey.POSITION: 3769, StateKey.METADATA_EXTRACTED: True } parser.set_state(new_state) particles = parser.get_records(2) self.assertTrue(len(particles) == 2) # offset in the expected results offset = 18 for i in range(len(particles)): self.assert_result(expected_results['data'][i + offset], particles[i]) stream_handle.close()
def test_extra_data(self): """ Ensure that bad data is skipped when it exists. """ # the first 2 data record in this file are corrupted by adding additional # data vlaues separated by tabs and will be ignored # we expect the first 2 particles to be the metadata particle and the # intrument particle from the data record after the corrupted one file_path = os.path.join(RESOURCE_PATH, '11079364_EXTRA_DATA_PPD_OCR.txt') stream_handle = open(file_path, 'rb') log.info(self.exception_callback_value) parser = SpkirAbjCsppParser( self.config.get(DataTypeKey.SPKIR_ABJ_CSPP_RECOVERED), None, stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = parser.get_records(2) self.assertTrue(self.exception_callback_value != None) self.assert_( isinstance(self.exception_callback_value, RecoverableSampleException)) # expect to see a recoverable sample exception in the log log.debug('TEST EXTRA DATA exception call back is %s', self.exception_callback_value) expected_results = self.get_dict_from_yml('extra_data_values.yml') self.assertTrue(len(particles) == 2) # since the first two records were corrupted the first records recieved # should be metadata particle with the timestamp of the 3rd data row # and the insturment particle from the 3rd row for i in range(len(particles)): self.assert_result(expected_results['data'][i], particles[i]) stream_handle.close()
def create_yml(self): """ This utility creates a yml file """ #ADCP_data_20130702.PD0 has one record in it fid = open(os.path.join(RESOURCE_PATH, '11079419_PPB_OCR.txt'), 'r') stream_handle = fid parser = SpkirAbjCsppParser( self.config.get(DataTypeKey.SPKIR_ABJ_CSPP_RECOVERED), None, stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = parser.get_records(20) self.particle_to_yml(particles, '11079419_PPB_OCR_recov.yml') fid.close()
def test_mid_state_start(self): """ This test makes sure that we retrieve the correct particles upon starting with an offset state. """ file_path = os.path.join(RESOURCE_PATH, '11079419_PPB_OCR.txt') stream_handle = open(file_path, 'rb') # position 1410 is the end of the frist data record, which would have produced the # metadata particle and the first instrument particle initial_state = { StateKey.POSITION: 1410, StateKey.METADATA_EXTRACTED: True } parser = SpkirAbjCsppParser( self.config.get(DataTypeKey.SPKIR_ABJ_CSPP_RECOVERED), initial_state, stream_handle, self.state_callback, self.pub_callback, self.exception_callback) #expect to get the 2nd and 3rd instrument particles next particles = parser.get_records(2) log.debug("Num particles: %s", len(particles)) self.assertTrue(len(particles) == 2) expected_results = self.get_dict_from_yml('mid_state_start.yml') for i in range(len(particles)): self.assert_result(expected_results['data'][i], particles[i]) # now expect the state to be the end of the 4 data record and metadata sent the_new_state = { StateKey.POSITION: 1704, StateKey.METADATA_EXTRACTED: True } log.debug("********** expected state: %s", the_new_state) log.debug("******** new parser state: %s", parser._state) self.assertTrue(parser._state == the_new_state) stream_handle.close()