def _build_parser(self, parser_state, file_handle, data_key=None): # configure the parser based on the data_key if data_key == DataTypeKey.ADCPA_INSTRUMENT: config = self._parser_config.get(data_key) config.update({ DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.adcpa_m_glider', DataSetDriverConfigKeys.PARTICLE_CLASS: 'AdcpaMGliderInstrumentParticle' }) elif data_key == DataTypeKey.ADCPA_RECOVERED: config = self._parser_config.get(data_key) config.update({ DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.adcpa_m_glider', DataSetDriverConfigKeys.PARTICLE_CLASS: 'AdcpaMGliderRecoveredParticle' }) else: # if we don't get a valid data_key raise exception log.warn('Parser got bad configuration DataTypeKey') raise ConfigurationException return None parser = AdcpPd0Parser( config, parser_state, file_handle, lambda state, ingested: self._save_parser_state( state, data_key, ingested), self._data_callback, self._sample_exception_callback) return parser
def test_bad_data(self): """ Ensure that bad data is skipped when it exists. """ #LB180210_3_corrupted.PD0 has three records in it, the 2nd record was corrupted fid = open(os.path.join(RESOURCE_PATH, 'LB180210_3_corrupted.PD0'), 'rb') self.stream_handle = fid self.parser = AdcpPd0Parser(self.config_recov, None, self.stream_handle, self.state_callback, self.publish_callback, self.exception_callback) #try to get 3 particles, should only get 2 back #the second one should correspond to ensemble 3 self.parser.get_records(3) log.debug('Exceptions : %s', self.exception_callback_value) self.assert_( isinstance(self.exception_callback_value[0], UnexpectedDataException)) fid.close()
def parse(basePythonCodePath, sourceFilePath, particleDataHdlrObj): from mi.logging import config config.add_configuration( os.path.join(basePythonCodePath, 'res', 'config', 'mi-logging.yml')) log = get_logger() config = { DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.adcps_jln', DataSetDriverConfigKeys.PARTICLE_CLASS: 'AdcpsJlnParticle' } log.trace("My ADCPS JLN Config: %s", config) def exception_callback(exception): log.debug("ERROR: %r", exception) particleDataHdlrObj.setParticleDataCaptureFailure() with open(sourceFilePath, 'rb') as file_handle: parser = AdcpPd0Parser(config, None, file_handle, lambda state, ingested: None, lambda data: None, exception_callback) driver = DataSetDriver(parser, particleDataHdlrObj) driver.processFileStream() return particleDataHdlrObj
def test_simple(self): """ Read test data and pull out data particles one at a time. Assert that the results are those we expected. The contents of ADCP_data_20130702.000 are the expected results from the IDD. These results for the that record were manually verified and are the entire parsed particle is represented in ADCP_data_20130702.yml """ #LA101636.PD0 was attached to the IDD and used to verify earlier #versions of this parser fid = open(os.path.join(RESOURCE_PATH, 'LA101636.PD0'), 'rb') self.stream_handle = fid self.parser = AdcpPd0Parser(self.config, self.start_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = self.parser.get_records(1) log.debug('got back %d particles', len(particles)) self.assert_result(self.test01, particles[0]) fid.close()
def test_simple_telem(self): """ Read test data and pull out data particles one at a time. Assert that the results are those we expected. The contents of ADCP_data_20130702.000 are the expected results from the IDD. These results for the that record were manually verified and are the entire parsed particle is represented in ADCP_data_20130702.yml """ # ND072022.PD0 contains a single ADCPA ensemble fid = open(os.path.join(RESOURCE_PATH, 'ND072022.PD0'), 'rb') self.stream_handle = fid self.parser = AdcpPd0Parser(self.config_telem, None, self.stream_handle, self.state_callback, self.publish_callback, self.exception_callback) particles = self.parser.get_records(1) log.debug('got back %d particles', len(particles)) self.assert_particles(particles, 'ND072022_telem.yml', RESOURCE_PATH) fid.close()
def test_simple(self): """ Read test data and pull out data particles one at a time. Assert that the results are those we expected. The contents of ADCP_data_20130702.000 are the expected results from the IDD. These results for the that record were manually verified and are the entire parsed particle is represented in ADCP_data_20130702.yml """ #ADCP_data_20130702.PD0 has one record in it fid = open(os.path.join(RESOURCE_PATH, 'ADCP_data_20130702.000'), 'rb') self.stream_handle = fid self.parser = AdcpPd0Parser(self.config, self.start_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = self.parser.get_records(1) #this simple test shows the 2 ways to verify results self.assert_result(self.test01, particles[0]) test_data = self.get_dict_from_yml('ADCP_data_20130702.yml') self.assert_result(test_data['data'][0], particles[0]) #close the file fid.close()
def parse(basePythonCodePath, sourceFilePath, particleDataHdlrObj): from mi.logging import config config.add_configuration(os.path.join(basePythonCodePath, 'res', 'config', 'mi-logging.yml')) log = get_logger() config = { DataSetDriverConfigKeys.PARTICLE_CLASSES_DICT: { 'velocity': 'VelocityEarth', 'engineering': 'AdcpsEngineering', 'config': 'AdcpsConfig', 'bottom_track': 'EarthBottom', 'bottom_track_config': 'BottomConfig', } } log.trace("My ADCPS JLN Config: %s", config) def exception_callback(exception): log.error("ERROR: %r", exception) particleDataHdlrObj.setParticleDataCaptureFailure() with open(sourceFilePath, 'rb') as file_handle: parser = AdcpPd0Parser(config, file_handle, exception_callback) driver = DataSetDriver(parser, particleDataHdlrObj) driver.processFileStream() return particleDataHdlrObj
def _build_parser(self, stream_handle): config = { DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.adcpa_n', DataSetDriverConfigKeys.PARTICLE_CLASS: 'AdcpaNInstrumentParticle' } parser = AdcpPd0Parser(config, stream_handle, self._exception_callback) return parser
def test_long_stream(self): with open(os.path.join(RESOURCE_PATH, 'adcp.adc'), 'rb') as stream_handle: parser = AdcpPd0Parser(self.config, stream_handle, self.exception_callback) particles = parser.get_records( 5000) # ask for 5000 should get 4132 log.debug('got back %d particles', len(particles)) self.assertEqual(len(particles), 4132) self.assertEqual(self.exception_callback_value, [])
def test_bad_data(self): """ Ensure that bad data is skipped when it exists. """ # ADCP_data_Corrupted.PD0 has one bad record followed by one good in it with open(os.path.join(RESOURCE_PATH, 'ADCP_data_Corrupted.000'), 'rb') as stream_handle: parser = AdcpPd0Parser(self.config, stream_handle, self.exception_callback) particles = parser.get_records(1) self.assert_result(self.test01, particles[0])
def _build_parser(self, stream_handle): config = { DataSetDriverConfigKeys.PARTICLE_CLASSES_DICT: { 'velocity': 'VelocityInst', 'engineering': 'AuvEngineering', 'config': 'AuvConfig', 'bottom_track': 'InstBottom', 'bottom_track_config': 'BottomConfig', } } parser = AdcpPd0Parser(config, stream_handle, self._exception_callback) return parser
def test_set_state(self): """ Test changing to a new state after initializing the parser and reading data, as if new data has been found and the state has changed """ #ADCP_CCE1T_20.000 has 20 records in it fid = open(os.path.join(RESOURCE_PATH, 'ADCP_CCE1T_20.000'), 'rb') self.stream_handle = fid new_state = {StateKey.POSITION: 100} #ensembles in this file are 1254 bytes long #the first record found should be number 2 at byte 1254 self.parser = AdcpPd0Parser(self.config, new_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = self.parser.get_records(1) #just get 1 record self.assert_result(self.test05, particles[0]) new_state = {StateKey.POSITION: 6000} #ensembles in this file are 1254 bytes long #the first record found should be number 6 at byte 6270 self.parser = AdcpPd0Parser(self.config, new_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = self.parser.get_records(1) #just get 1 record self.assert_result(self.test04, particles[0]) fid.close()
def process(self): with open(self._source_file_path, "rb") as file_handle: def exception_callback(exception): log.debug("Exception: %s", exception) self._particle_data_handler.setParticleDataCaptureFailure() parser = AdcpPd0Parser(self._parser_config, file_handle, exception_callback) driver = DataSetDriver(parser, self._particle_data_handler) driver.processFileStream() return self._particle_data_handler
def test_get_many(self): with open(os.path.join(RESOURCE_PATH, 'adcp_auv_51.pd0'), 'rb') as stream_handle: parser = AdcpPd0Parser(self.config, stream_handle, self.exception_callback) particles = parser.get_records(51) log.info('got back %d particles', len(particles)) # Note yaml file was generated but hand checked against output of # vendor supplied MATLAB tool outputs self.assert_particles(particles, 'adcp_auv_51.yml', RESOURCE_PATH) self.assertEqual(len(self.exception_callback_value), 0)
def test_long_stream(self): """ Verify an entire file against a yaml result file. """ with open(os.path.join(RESOURCE_PATH, 'ADCP_CCE1T_20.000'), 'rb') as stream_handle: parser = AdcpPd0Parser(self.config, stream_handle, self.exception_callback) particles = parser.get_records(47) self.assert_particles(particles, 'ADCP_CCE1T_20.yml', RESOURCE_PATH) self.assertEqual(self.exception_callback_value, [])
def test_with_status_data(self): """ Verify the parser will work with a file that contains the status data block This was found during integration test with real recovered data """ with open(os.path.join(RESOURCE_PATH, 'ND161646.PD0'), 'rb') as stream_handle: parser = AdcpPd0Parser(self.config_recov, stream_handle, self.exception_callback) particles = parser.get_records(250) log.debug('got back %d records', len(particles)) self.assert_particles(particles, 'ND161646.yml', RESOURCE_PATH)
def create_yml(self): """ This utility creates a yml file """ #ADCP_data_20130702.PD0 has one record in it fid = open(os.path.join(RESOURCE_PATH, 'NE051400.PD0'), 'rb') self.stream_handle = fid self.parser = AdcpPd0Parser(self.config_recov, None, self.stream_handle, self.state_callback, self.publish_callback, self.exception_callback) particles = self.parser.get_records(250) self.particle_to_yml(particles, 'NE051400.yml') fid.close()
def test_get_many(self): """ Read test data and pull out multiple data particles at one time. Assert that the results are those we expected. """ with open(os.path.join(RESOURCE_PATH, 'ND072023.PD0'), 'rb') as stream_handle: parser = AdcpPd0Parser(self.config_recov, stream_handle, self.exception_callback) particles = parser.get_records(54) log.debug('got back %d records', len(particles)) self.assert_particles(particles, 'ND072023_recov.yml', RESOURCE_PATH)
def test_get_many(self): """ Read test data and pull out multiple data particles at one time. Assert that the results are those we expected. """ # ADCP_CCE1T_20.000 has 20 records in it with open(os.path.join(RESOURCE_PATH, 'ADCP_CCE1T_20.000'), 'rb') as stream_handle: parser = AdcpPd0Parser(self.config, stream_handle, self.exception_callback) particles = parser.get_records(50) self.assert_result(self.test02, particles[0]) self.assert_result(self.test03, particles[43])
def process(self): with open(self._sourceFilePath, "rb") as file_handle: def exception_callback(exception): log.debug("Exception: %s", exception) self._particleDataHdlrObj.setParticleDataCaptureFailure() parser = AdcpPd0Parser(self._parser_config, None, file_handle, lambda state, ingested: None, lambda data: None, exception_callback) driver = DataSetDriver(parser, self._particleDataHdlrObj) driver.processFileStream() return self._particleDataHdlrObj
def test_bad_data(self): """ Ensure that bad data is skipped when it exists. """ #LB180210_3_corrupted.PD0 has three records in it, the 2nd record was corrupted fid = open(os.path.join(RESOURCE_PATH, 'LB180210_3_corrupted.PD0'), 'rb') self.stream_handle = fid self.parser = AdcpPd0Parser(self.config, self.start_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) #try to get 3 particles, should only get 2 back #the second one should correspond to ensemble 3 particles = self.parser.get_records(3) self.assert_result(self.test06, particles[1]) fid.close()
def test_get_many(self): """ Read test data and pull out multiple data particles at one time. Assert that the results are those we expected. """ #LA101636_20.PD0 has 20 records in it fid = open(os.path.join(RESOURCE_PATH, 'ND072023.PD0'), 'rb') self.stream_handle = fid self.parser = AdcpPd0Parser(self.config_recov, None, self.stream_handle, self.state_callback, self.publish_callback, self.exception_callback) particles = self.parser.get_records(54) log.info('got back %d records', len(particles)) self.assert_particles(particles, 'ND072023_recov.yml', RESOURCE_PATH) fid.close()
def test_bad_data(self): """ Ensure that bad data is skipped when it exists. """ # LB180210_3_corrupted.PD0 has three records in it, the 2nd record was corrupted with open(os.path.join(RESOURCE_PATH, 'LB180210_3_corrupted.PD0'), 'rb') as stream_handle: parser = AdcpPd0Parser(self.config_recov, stream_handle, self.exception_callback) # try to get 3 particles, should only get 2 back # the second one should correspond to ensemble 3 parser.get_records(3) log.debug('Exceptions : %s', self.exception_callback_value[0]) self.assertEqual(len(self.exception_callback_value), 1) self.assert_( isinstance(self.exception_callback_value[0], RecoverableSampleException))
def test_get_many(self): """ Read test data and pull out multiple data particles at one time. Assert that the results are those we expected. """ #ADCP_CCE1T_20.000 has 20 records in it fid = open(os.path.join(RESOURCE_PATH, 'ADCP_CCE1T_20.000'), 'rb') self.stream_handle = fid self.parser = AdcpPd0Parser(self.config, self.start_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = self.parser.get_records(20) log.info('got back %d records', len(particles)) self.assert_result(self.test02, particles[0]) self.assert_result(self.test03, particles[19]) fid.close()
def test_mid_state_start(self): """ Test starting the parser in a state in the middle of processing """ #LB180210_50.PD0 has 50 records in it fid = open(os.path.join(RESOURCE_PATH, 'LB180210_50.PD0'), 'rb') self.stream_handle = fid new_state = {StateKey.POSITION: 12042} #ensembles in this file are 446 bytes long #the first record found should be number 28 at byte 12042 self.parser = AdcpPd0Parser(self.config, new_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = self.parser.get_records(5) self.assert_result(self.test04, particles[0]) fid.close()
def test_mid_state_start(self): """ Test starting the parser in a state in the middle of processing """ #ADCP_CCE1T_20.000 has 20 records in it fid = open(os.path.join(RESOURCE_PATH, 'ADCP_CCE1T_20.000'), 'rb') self.stream_handle = fid new_state = {StateKey.POSITION: 6000} #ensembles in this file are 1254 bytes long #the first record found should be number 6 at byte 6270 self.parser = AdcpPd0Parser(self.config, new_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = self.parser.get_records(5) self.assert_result(self.test04, particles[0]) fid.close()
def _build_parser(self, parser_state, file_handle, data_key=None): # configure the parser based on the data_key if data_key == DataTypeKey.ADCPS_JLN_STC: config = self._parser_config.get(data_key) config.update({ DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.adcps_jln_stc', DataSetDriverConfigKeys.PARTICLE_CLASS: 'AdcpsJlnStcInstrumentParserDataParticle' }) parser = AdcpsJlnStcParser( config, parser_state, file_handle, lambda state, ingested: self._save_parser_state( state, data_key, ingested), self._data_callback, self._sample_exception_callback) elif data_key == DataTypeKey.ADCPS_JLN: config = self._parser_config.get(data_key) config.update({ DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.adcps_jln', DataSetDriverConfigKeys.PARTICLE_CLASS: 'AdcpsJlnParticle' }) parser = AdcpPd0Parser( config, parser_state, file_handle, lambda state, ingested: self._save_parser_state( state, data_key, ingested), self._data_callback, self._sample_exception_callback) else: # if we don't get a valid data_key raise exception log.warn('Parser got bad configuration DataTypeKey') raise ConfigurationException return parser
def trim_file(self): """ This utility routine can be used to trim large PD0 files down to a more manageable size. It uses the sieve in the parser to create a copy of the file with a specified number of records """ #define these values as needed input_file = 'ADCP_CCE1T.000' output_file = 'ADCP_CCE1T_21_40.000' num_rec = 20 first_rec = 21 log.info("opening file") infid = open(os.path.join(RESOURCE_PATH, input_file), 'rb') in_buffer = infid.read() log.info("file read") stream_handle = infid #parser needs a stream handle even though it won't use it parser = AdcpPd0Parser(self.config, self.start_state, stream_handle, self.state_callback, self.pub_callback, self.exception_callback) log.info("parser created, calling sieve") indices = parser.sieve_function(in_buffer) #get the start and ends of all the records log.info("sieve returned %d indeces", len(indices)) if len(indices) < first_rec + num_rec: log.info('trim_file: not enough records in file no output created') return first_byte = indices[first_rec - 1][0] last_byte = indices[first_rec - 1 + num_rec - 1][1] log.info('first byte is %d last_byte is %d', first_byte, last_byte) outfid = open(os.path.join(RESOURCE_PATH, output_file), 'wb') outfid.write(in_buffer[first_byte:last_byte]) outfid.close() infid.close()
def parse(unused, source_file_path, particle_data_handler): config = { DataSetDriverConfigKeys.PARTICLE_CLASSES_DICT: { 'velocity': 'VelocityEarth', 'engineering': 'AdcpsEngineering', 'config': 'AdcpsConfig', 'bottom_track': 'EarthBottom', 'bottom_track_config': 'BottomConfig', } } log.trace("My ADCPS JLN Config: %s", config) def exception_callback(exception): log.error("ERROR: %r", exception) particle_data_handler.setParticleDataCaptureFailure() with open(source_file_path, 'rb') as file_handle: parser = AdcpPd0Parser(config, file_handle, exception_callback) driver = DataSetDriver(parser, particle_data_handler) driver.processFileStream() return particle_data_handler
def test_bug_10136(self): """ Ensure that bad ensembles are skipped and all valid ensembles are returned. """ with open( os.path.join(RESOURCE_PATH, 'SN_18596_Recovered_Data_RDI_000.000'), 'rb') as stream_handle: parser = AdcpPd0Parser(self.config, stream_handle, self.exception_callback) particles = parser.get_records(40000) particle_counter = Counter() for particle in particles: particle_counter[particle._data_particle_type] += 1 self.assertEqual( particle_counter[AdcpDataParticleType.VELOCITY_EARTH], 13913) self.assertTrue(len(self.exception_callback_value) > 0)