def test_simple(self): """ Read test data and pull out 20 data particles. Assert that the results are those we expected. """ file_path = os.path.join(RESOURCE_PATH, RECOVERED_SAMPLE_DATA) stream_handle = open(file_path, 'r') # Note: since the recovered and telemetered parser and particles are common # to each other, testing one is sufficient, will be completely tested # in driver tests parser = CtdpfJCsppParser(self.config.get(DataTypeKey.CTDPF_J_CSPP_RECOVERED), None, stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = parser.get_records(20) log.debug("*** test_simple Num particles %s", len(particles)) # load a dictionary from the yml file test_data = self.get_dict_from_yml('11079364_PPB_CTD_recov.yml') # check all the values against expected results. for i in range(len(particles)): self.assert_result(test_data['data'][i], particles[i]) stream_handle.close()
def test_simple(self): """ Read test data and pull out 20 data particles. Assert that the results are those we expected. """ file_path = os.path.join(RESOURCE_PATH, RECOVERED_SAMPLE_DATA) stream_handle = open(file_path, 'r') # Note: since the recovered and telemetered parser and particles are common # to each other, testing one is sufficient, will be completely tested # in driver tests parser = CtdpfJCsppParser( self.config.get(DataTypeKey.CTDPF_J_CSPP_RECOVERED), None, stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = parser.get_records(20) log.debug("*** test_simple Num particles %s", len(particles)) # load a dictionary from the yml file test_data = self.get_dict_from_yml('11079364_PPB_CTD_recov.yml') # check all the values against expected results. for i in range(len(particles)): self.assert_result(test_data['data'][i], particles[i]) stream_handle.close()
def test_bad_data(self): """ Ensure that bad data is skipped when it exists and a RecoverableSampleException is thrown. Note: every other data record has bad data (float instead of int, extra column etc.) """ file_path = os.path.join(RESOURCE_PATH, '11079364_BAD_PPB_CTD.txt') stream_handle = open(file_path, 'rU') log.debug(self.exception_callback_value) parser = CtdpfJCsppParser( self.config.get(DataTypeKey.CTDPF_J_CSPP_RECOVERED), stream_handle, self.exception_callback) parser.get_records(1) log.debug("Exception callback value: %s", self.exception_callback_value) self.assertTrue(self.exception_callback_value is not None) for i in range(len(self.exception_callback_value)): self.assert_( isinstance(self.exception_callback_value[i], RecoverableSampleException)) self.assertEqual(len(self.exception_callback_value), 12) stream_handle.close()
def test_get_many(self): """ Read test data and pull out multiple data particles Assert that we have the correct number of particles """ file_path = os.path.join(RESOURCE_PATH, RECOVERED_SAMPLE_DATA) stream_handle = open(file_path, "rU") parser = CtdpfJCsppParser( self.config.get(DataTypeKey.CTDPF_J_CSPP_RECOVERED), stream_handle, self.exception_callback ) particles = parser.get_records(3404) log.debug("*** test_get_many Num particles %s", len(particles)) self.assertEqual(len(particles), 3404) stream_handle.close() # Now do the same for the telemetered version file_path = os.path.join(RESOURCE_PATH, TELEMETERED_SAMPLE_DATA) stream_handle = open(file_path, "rU") parser = CtdpfJCsppParser( self.config.get(DataTypeKey.CTDPF_J_CSPP_TELEMETERED), stream_handle, self.exception_callback ) particles = parser.get_records(218) log.debug("*** test_get_many Num particles %s", len(particles)) self.assertEqual(len(particles), 218) stream_handle.close()
def test_bad_data(self): """ Ensure that bad data is skipped when it exists and a RecoverableSampleException is thrown. Note: every other data record has bad data (float instead of int, extra column etc.) """ file_path = os.path.join(RESOURCE_PATH, "11079364_BAD_PPB_CTD.txt") stream_handle = open(file_path, "rU") log.debug(self.exception_callback_value) parser = CtdpfJCsppParser( self.config.get(DataTypeKey.CTDPF_J_CSPP_RECOVERED), stream_handle, self.exception_callback ) parser.get_records(1) log.debug("Exception callback value: %s", self.exception_callback_value) self.assertTrue(self.exception_callback_value is not None) for i in range(len(self.exception_callback_value)): self.assert_(isinstance(self.exception_callback_value[i], RecoverableSampleException)) self.assertEqual(len(self.exception_callback_value), 12) stream_handle.close()
def create_yml(self): """ This utility creates a yml file """ fid = open(os.path.join(RESOURCE_PATH, TELEMETERED_SAMPLE_DATA), 'rU') stream_handle = fid parser = CtdpfJCsppParser( self.config.get(DataTypeKey.CTDPF_J_CSPP_TELEMETERED), stream_handle, self.exception_callback) particles = parser.get_records(20) self.particle_to_yml(particles, '11079364_PPD_CTD_telem.yml') fid.close()
def create_yml(self): """ This utility creates a yml file """ fid = open(os.path.join(RESOURCE_PATH, TELEMETERED_SAMPLE_DATA), "rU") stream_handle = fid parser = CtdpfJCsppParser( self.config.get(DataTypeKey.CTDPF_J_CSPP_TELEMETERED), stream_handle, self.exception_callback ) particles = parser.get_records(20) self.particle_to_yml(particles, "11079364_PPD_CTD_telem.yml") fid.close()
def test_simple(self): """ Read test data and pull out 20 data particles. Assert that the results are those we expected. """ file_path = os.path.join(RESOURCE_PATH, RECOVERED_SAMPLE_DATA) stream_handle = open(file_path, "rU") # Note: since the recovered and telemetered parser and particles are common # to each other, testing one is sufficient, will be completely tested # in driver tests parser = CtdpfJCsppParser( self.config.get(DataTypeKey.CTDPF_J_CSPP_RECOVERED), stream_handle, self.exception_callback ) particles = parser.get_records(20) log.debug("*** test_simple Num particles %s", len(particles)) self.assert_particles(particles, "11079364_PPB_CTD_recov.yml", RESOURCE_PATH) stream_handle.close() # Now do the same for the telemetered version file_path = os.path.join(RESOURCE_PATH, TELEMETERED_SAMPLE_DATA) stream_handle = open(file_path, "rU") # Note: since the recovered and telemetered parser and particles are common # to each other, testing one is sufficient, will be completely tested # in driver tests parser = CtdpfJCsppParser( self.config.get(DataTypeKey.CTDPF_J_CSPP_TELEMETERED), stream_handle, self.exception_callback ) particles = parser.get_records(20) log.debug("*** test_simple Num particles %s", len(particles)) self.assert_particles(particles, "11079364_PPD_CTD_telem.yml", RESOURCE_PATH) stream_handle.close()
def test_mid_state_start(self): """ This test makes sure that we retrieve the correct particles upon starting with an offset state. """ file_path = os.path.join(RESOURCE_PATH, RECOVERED_SAMPLE_DATA) stream_handle = open(file_path, 'rb') # position 309 is the beginning of the second data record, which would have produced the # metadata particle and the first instrument particle initial_state = { StateKey.POSITION: 309, StateKey.METADATA_EXTRACTED: True } parser = CtdpfJCsppParser( self.config.get(DataTypeKey.CTDPF_J_CSPP_RECOVERED), initial_state, stream_handle, self.state_callback, self.pub_callback, self.exception_callback) # expect to get the 2nd and 3rd instrument particles next particles = parser.get_records(2) log.debug("Num particles: %s", len(particles)) self.assertTrue(len(particles) == 2) expected_results = self.get_dict_from_yml('mid_state_start.yml') for i in range(len(particles)): self.assert_result(expected_results['data'][i], particles[i]) # now expect the state to be the beginning of 5th data record and metadata sent the_new_state = { StateKey.POSITION: 403, StateKey.METADATA_EXTRACTED: True } log.debug("********** expected state: %s", the_new_state) log.debug("******** new parser state: %s", parser._state) self.assertTrue(parser._state == the_new_state) stream_handle.close()
def test_set_state(self): """ Test changing to a new state after initializing the parser and reading data, as if new data has been found and the state has changed """ file_path = os.path.join(RESOURCE_PATH, RECOVERED_SAMPLE_DATA) stream_handle = open(file_path, 'r') # The yml file has the metadata and the first 19 # instrument particles in it expected_results = self.get_dict_from_yml('11079364_PPB_CTD_recov.yml') parser = CtdpfJCsppParser(self.config.get(DataTypeKey.CTDPF_J_CSPP_RECOVERED), None, stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = parser.get_records(2) log.debug("Num particles: %s", len(particles)) self.assertTrue(len(particles) == 2) for i in range(len(particles)): self.assert_result(expected_results['data'][i], particles[i]) # position 1061 is the byte at the start of the 18th data record new_state = {StateKey.POSITION: 1061, StateKey.METADATA_EXTRACTED: True} parser.set_state(new_state) particles = parser.get_records(2) self.assertTrue(len(particles) == 2) # offset in the expected results, into the 18th result offset = 18 for i in range(len(particles)): self.assert_result(expected_results['data'][i + offset], particles[i]) stream_handle.close()
def test_get_many(self): """ Read test data and pull out multiple data particles Assert that we have the correct number of particles """ file_path = os.path.join(RESOURCE_PATH, RECOVERED_SAMPLE_DATA) stream_handle = open(file_path, 'rU') parser = CtdpfJCsppParser( self.config.get(DataTypeKey.CTDPF_J_CSPP_RECOVERED), stream_handle, self.exception_callback) particles = parser.get_records(3404) log.debug("*** test_get_many Num particles %s", len(particles)) self.assertEqual(len(particles), 3404) stream_handle.close() # Now do the same for the telemetered version file_path = os.path.join(RESOURCE_PATH, TELEMETERED_SAMPLE_DATA) stream_handle = open(file_path, 'rU') parser = CtdpfJCsppParser( self.config.get(DataTypeKey.CTDPF_J_CSPP_TELEMETERED), stream_handle, self.exception_callback) particles = parser.get_records(218) log.debug("*** test_get_many Num particles %s", len(particles)) self.assertEqual(len(particles), 218) stream_handle.close()
def _build_parser(self, parser_state, infile, data_key=None): """ Build and return the parser """ config = self._parser_config.get(data_key) # # If the key is RECOVERED, build the recovered parser. # if data_key == DataTypeKey.CTDPF_J_CSPP_RECOVERED: config.update({ DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.ctdpf_j_cspp', DataSetDriverConfigKeys.PARTICLE_CLASS: None, DataSetDriverConfigKeys.PARTICLE_CLASSES_DICT: { METADATA_PARTICLE_CLASS_KEY: CtdpfJCsppMetadataRecoveredDataParticle, DATA_PARTICLE_CLASS_KEY: CtdpfJCsppInstrumentRecoveredDataParticle } }) # # If the key is TELEMETERED, build the telemetered parser. # elif data_key == DataTypeKey.CTDPF_J_CSPP_TELEMETERED: config.update({ DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.ctdpf_j_cspp', DataSetDriverConfigKeys.PARTICLE_CLASS: None, DataSetDriverConfigKeys.PARTICLE_CLASSES_DICT: { METADATA_PARTICLE_CLASS_KEY: CtdpfJCsppMetadataTelemeteredDataParticle, DATA_PARTICLE_CLASS_KEY: CtdpfJCsppInstrumentTelemeteredDataParticle } }) # # If the key is one that we're not expecting, don't build any parser. # else: raise ConfigurationException( "Invalid data_key (%s) supplied to build parser" % data_key) parser = CtdpfJCsppParser( config, parser_state, infile, lambda state, ingested: self._save_parser_state( state, data_key, ingested), self._data_callback, self._sample_exception_callback) return parser
def _build_parser(self, stream_handle): parser_config = { DataSetDriverConfigKeys.PARTICLE_CLASS: None, DataSetDriverConfigKeys.PARTICLE_CLASSES_DICT: { METADATA_PARTICLE_CLASS_KEY: CtdpfJCsppMetadataTelemeteredDataParticle, DATA_PARTICLE_CLASS_KEY: CtdpfJCsppInstrumentTelemeteredDataParticle } } parser = CtdpfJCsppParser(parser_config, stream_handle, self._exception_callback) return parser
def test_get_many(self): """ Read test data and pull out multiple data particles Assert that we have the correct number of particles """ file_path = os.path.join(RESOURCE_PATH, RECOVERED_SAMPLE_DATA) stream_handle = open(file_path, 'r') # Note: since the recovered and telemetered parser and particles are common # to each other, testing one is sufficient, will be completely tested # in driver tests parser = CtdpfJCsppParser(self.config.get(DataTypeKey.CTDPF_J_CSPP_RECOVERED), None, stream_handle, self.state_callback, self.pub_callback, self.exception_callback) # try to get 2000 particles, 1999 data records plus one meta data particles = parser.get_records(2000) log.debug("*** test_get_many Num particles %s", len(particles)) self.assertEqual(len(particles), 2000) stream_handle.close()
def test_mid_state_start(self): """ This test makes sure that we retrieve the correct particles upon starting with an offset state. """ file_path = os.path.join(RESOURCE_PATH, RECOVERED_SAMPLE_DATA) stream_handle = open(file_path, 'rb') # position 309 is the beginning of the second data record, which would have produced the # metadata particle and the first instrument particle initial_state = {StateKey.POSITION: 309, StateKey.METADATA_EXTRACTED: True} parser = CtdpfJCsppParser(self.config.get(DataTypeKey.CTDPF_J_CSPP_RECOVERED), initial_state, stream_handle, self.state_callback, self.pub_callback, self.exception_callback) # expect to get the 2nd and 3rd instrument particles next particles = parser.get_records(2) log.debug("Num particles: %s", len(particles)) self.assertTrue(len(particles) == 2) expected_results = self.get_dict_from_yml('mid_state_start.yml') for i in range(len(particles)): self.assert_result(expected_results['data'][i], particles[i]) # now expect the state to be the beginning of 5th data record and metadata sent the_new_state = {StateKey.POSITION: 403, StateKey.METADATA_EXTRACTED: True} log.debug("********** expected state: %s", the_new_state) log.debug("******** new parser state: %s", parser._state) self.assertTrue(parser._state == the_new_state) stream_handle.close()
def test_get_many(self): """ Read test data and pull out multiple data particles Assert that we have the correct number of particles """ file_path = os.path.join(RESOURCE_PATH, RECOVERED_SAMPLE_DATA) stream_handle = open(file_path, 'r') # Note: since the recovered and telemetered parser and particles are common # to each other, testing one is sufficient, will be completely tested # in driver tests parser = CtdpfJCsppParser( self.config.get(DataTypeKey.CTDPF_J_CSPP_RECOVERED), None, stream_handle, self.state_callback, self.pub_callback, self.exception_callback) # try to get 2000 particles, 1999 data records plus one meta data particles = parser.get_records(2000) log.debug("*** test_get_many Num particles %s", len(particles)) self.assertEqual(len(particles), 2000) stream_handle.close()
def test_set_state(self): """ Test changing to a new state after initializing the parser and reading data, as if new data has been found and the state has changed """ file_path = os.path.join(RESOURCE_PATH, RECOVERED_SAMPLE_DATA) stream_handle = open(file_path, 'r') # The yml file has the metadata and the first 19 # instrument particles in it expected_results = self.get_dict_from_yml('11079364_PPB_CTD_recov.yml') parser = CtdpfJCsppParser( self.config.get(DataTypeKey.CTDPF_J_CSPP_RECOVERED), None, stream_handle, self.state_callback, self.pub_callback, self.exception_callback) particles = parser.get_records(2) log.debug("Num particles: %s", len(particles)) self.assertTrue(len(particles) == 2) for i in range(len(particles)): self.assert_result(expected_results['data'][i], particles[i]) # position 1061 is the byte at the start of the 18th data record new_state = { StateKey.POSITION: 1061, StateKey.METADATA_EXTRACTED: True } parser.set_state(new_state) particles = parser.get_records(2) self.assertTrue(len(particles) == 2) # offset in the expected results, into the 18th result offset = 18 for i in range(len(particles)): self.assert_result(expected_results['data'][i + offset], particles[i]) stream_handle.close()
def test_simple(self): """ Read test data and pull out 20 data particles. Assert that the results are those we expected. """ file_path = os.path.join(RESOURCE_PATH, RECOVERED_SAMPLE_DATA) stream_handle = open(file_path, 'rU') # Note: since the recovered and telemetered parser and particles are common # to each other, testing one is sufficient, will be completely tested # in driver tests parser = CtdpfJCsppParser( self.config.get(DataTypeKey.CTDPF_J_CSPP_RECOVERED), stream_handle, self.exception_callback) particles = parser.get_records(20) log.debug("*** test_simple Num particles %s", len(particles)) self.assert_particles(particles, '11079364_PPB_CTD_recov.yml', RESOURCE_PATH) stream_handle.close() # Now do the same for the telemetered version file_path = os.path.join(RESOURCE_PATH, TELEMETERED_SAMPLE_DATA) stream_handle = open(file_path, 'rU') # Note: since the recovered and telemetered parser and particles are common # to each other, testing one is sufficient, will be completely tested # in driver tests parser = CtdpfJCsppParser( self.config.get(DataTypeKey.CTDPF_J_CSPP_TELEMETERED), stream_handle, self.exception_callback) particles = parser.get_records(20) log.debug("*** test_simple Num particles %s", len(particles)) self.assert_particles(particles, '11079364_PPD_CTD_telem.yml', RESOURCE_PATH) stream_handle.close()