def test_update(self): """ Test a file which has had a section of data replaced by 0s, as if a block of data has not been received yet, then using the returned state make a new parser with the test data that has the 0s filled in """ log.debug('Starting test_update') self.state = { StateKey.UNPROCESSED_DATA: [[0, 5000]], StateKey.IN_PROCESS_DATA: [], StateKey.FILE_SIZE: 22000 } # this file has a block of AD data replaced by 0s self.stream_handle = open( os.path.join(RESOURCE_PATH, 'node59p1_replaced.dat')) self.parser = AdcpsParser(self.config, self.state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) result = self.parser.get_records(1) self.assert_result( result, [[868, 1254, 1, 0], [1444, 1830, 1, 0], [2020, 2406, 1, 0], [2665, 3050, 1, 0], [3240, 3627, 1, 0], [4461, 4847, 1, 0]], [[0, 32], [607, 678], [868, 1254], [1444, 1830], [2020, 2475], [2665, 3050], [3240, 3627], [3817, 4271], [4461, 5000]], self.particle_a) result = self.parser.get_records(1) self.assert_result( result, [[1444, 1830, 1, 0], [2020, 2406, 1, 0], [2665, 3050, 1, 0], [3240, 3627, 1, 0], [4461, 4847, 1, 0]], [[0, 32], [607, 678], [1444, 1830], [2020, 2475], [2665, 3050], [3240, 3627], [3817, 4271], [4461, 5000]], self.particle_b) self.stream_handle.close() next_state = self.parser._state # this file has the block of CT data that was missing in the previous file self.stream_handle = open( os.path.join(RESOURCE_PATH, 'node59p1_shorter.dat')) self.parser = AdcpsParser(self.config, next_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) # first get the old 'in process' records # Once those are done, the un processed data will be checked result = self.parser.get_records(5) self.assertEqual(result[0], self.particle_c) self.assertEqual(result[1], self.particle_d) self.assertEqual(result[2], self.particle_e) self.assert_state( [], [[0, 32], [607, 678], [2406, 2475], [3817, 4271], [4847, 5000]]) # this should be the first of the newly filled in particles from result = self.parser.get_records(1) self.assert_result( result, [], [[0, 32], [607, 678], [2406, 2475], [4204, 4271], [4847, 5000]], self.particle_g) self.stream_handle.close() self.assertEqual(self.exception_callback_value, None)
def test_update(self): """ Test a file which has had a section of data replaced by 0s, as if a block of data has not been received yet, then using the returned state make a new parser with the test data that has the 0s filled in """ log.debug('Starting test_update') self.state = { StateKey.UNPROCESSED_DATA: [[0, 5000]], StateKey.IN_PROCESS_DATA: [], StateKey.TIMESTAMP: 0.0 } # this file has a block of AD data replaced by 0s self.stream_handle = open( os.path.join(RESOURCE_PATH, 'node59p1_replaced.dat')) self.parser = AdcpsParser( self.config, self.state, self.stream_handle, self.state_callback, self.pub_callback) # last one is the link to the data source result = self.parser.get_records(1) self.assert_result( result, [[1447, 1833, 1, 0, 0], [4471, 4857, 1, 0, 1]], [[0, 32], [222, 871], [1447, 3058], [3248, 4281], [4471, 5000]], self.timestamp4, self.particle_a) result = self.parser.get_records(1) self.assert_result( result, [[4471, 4857, 1, 0, 1]], [[0, 32], [222, 871], [1833, 3058], [3248, 4281], [4471, 5000]], self.timestamp4, self.particle_b) self.stream_handle.close() next_state = self.parser._state # this file has the block of CT data that was missing in the previous file self.stream_handle = open( os.path.join(RESOURCE_PATH, 'node59p1_shorter.dat')) self.parser = AdcpsParser( self.config, next_state, self.stream_handle, self.state_callback, self.pub_callback) # last one is the link to the data source # first get the old 'in process' records # Once those are done, the un processed data will be checked result = self.parser.get_records(1) self.assert_result( result, [], [[0, 32], [222, 871], [1833, 3058], [3248, 4281], [4857, 5000]], self.timestamp4, self.particle_d) # this should be the first of the newly filled in particles from result = self.parser.get_records(1) self.assert_result(result, [], [[0, 32], [222, 871], [1833, 3058], [3248, 3827], [4214, 4281], [4857, 5000]], self.timestamp3, self.particle_c) self.stream_handle.close()
def test_in_process_start(self): """ test starting a parser with a state in the middle of processing """ log.debug('Starting test_in_process_start') new_state = { StateKey.IN_PROCESS_DATA: [[1447, 1833, 1, 0, 0], [3827, 4214, 1, 0, 1], [4471, 4857, 1, 0, 1]], StateKey.UNPROCESSED_DATA: [[0, 32], [222, 871], [1447, 3058], [3248, 4281], [4471, 5000]], StateKey.TIMESTAMP: self.timestamp4 } self.stream_handle = open( os.path.join(RESOURCE_PATH, 'node59p1_shorter.dat')) self.parser = AdcpsParser( self.config, new_state, self.stream_handle, self.state_callback, self.pub_callback) # last one is the link to the data source result = self.parser.get_records(1) # even though the state says this particle is not a new sequence, since it is the # first after setting the state it will be new self.assert_result( result, [[3827, 4214, 1, 0, 1], [4471, 4857, 1, 0, 1]], [[0, 32], [222, 871], [1833, 3058], [3248, 4281], [4471, 5000]], self.timestamp2, self.particle_b_new) result = self.parser.get_records(2) self.assertEqual(result[0], self.particle_c) self.assertEqual(result[1], self.particle_d) self.assert_state([], [[0, 32], [222, 871], [1833, 3058], [3248, 3827], [4214, 4281], [4857, 5000]], self.timestamp4) self.assertEqual(self.publish_callback_value[-1], self.particle_d)
def test_mid_state_start(self): """ test starting a parser with a state in the middle of processing """ log.debug('Starting test_mid_state_start') new_state = { StateKey.IN_PROCESS_DATA: [], StateKey.UNPROCESSED_DATA: [[0, 32], [222, 871], [1447, 5000]], StateKey.TIMESTAMP: self.timestamp1 } self.stream_handle = open( os.path.join(RESOURCE_PATH, 'node59p1_shorter.dat')) self.parser = AdcpsParser( self.config, new_state, self.stream_handle, self.state_callback, self.pub_callback) # last one is the link to the data source result = self.parser.get_records(1) self.assert_result( result, [[3827, 4214, 1, 0, 1], [4471, 4857, 1, 0, 1]], [[0, 32], [222, 871], [1833, 3058], [3248, 4281], [4471, 5000]], self.timestamp4, self.particle_b_new) result = self.parser.get_records(1) self.assert_result(result, [[4471, 4857, 1, 0, 1]], [[0, 32], [222, 871], [1833, 3058], [3248, 3827], [4214, 4281], [4471, 5000]], self.timestamp4, self.particle_c) self.stream_handle.close()
def test_long_stream(self): log.debug('Starting test_long_stream') self.stream_handle = open( os.path.join(RESOURCE_PATH, 'node59p1_shorter.dat')) data = self.stream_handle.read() data_len = len(data) self.stream_handle.seek(0) self.state = { StateKey.UNPROCESSED_DATA: [[0, data_len]], StateKey.IN_PROCESS_DATA: [], StateKey.TIMESTAMP: 0.0 } self.parser = AdcpsParser( self.config, self.state, self.stream_handle, self.state_callback, self.pub_callback) # last one is the link to the data source result = self.parser.get_records(12) self.stream_handle.close() self.assertEqual(result[0], self.particle_a) self.assertEqual(result[1], self.particle_b) self.assertEqual(result[2], self.particle_c) self.assertEqual(result[3], self.particle_d) self.assertEqual(result[-2], self.particle_k) self.assertEqual(result[-1], self.particle_l) self.assert_state( [], [[0, 32], [222, 871], [1833, 3058], [3248, 3827], [4214, 4281], [5047, 5153], [5539, 5730], [5786, 6433], [7009, 7396], [7586, 9200], [14220, 14608], [15374, 15830], [16596, 17280], [17722, 17791], [19133, 22000]], self.timestamp_l) self.assertEqual(self.publish_callback_value[-2], self.particle_k) self.assertEqual(self.publish_callback_value[-1], self.particle_l)
def test_get_many(self): """ Read test data from the file and pull out multiple data particles at one time. Assert that the results are those we expected. """ log.debug('Starting test_get_many') self.state = { StateKey.UNPROCESSED_DATA: [[0, 5000]], StateKey.IN_PROCESS_DATA: [], StateKey.TIMESTAMP: 0.0 } self.stream_handle = open( os.path.join(RESOURCE_PATH, 'node59p1_shorter.dat')) self.parser = AdcpsParser( self.config, self.state, self.stream_handle, self.state_callback, self.pub_callback) # last one is the link to the data source result = self.parser.get_records(4) self.stream_handle.close() self.assertEqual(result, [ self.particle_a, self.particle_b, self.particle_c, self.particle_d ]) self.assert_state([], [[0, 32], [222, 871], [1833, 3058], [3248, 3827], [4214, 4281], [4857, 5000]], self.timestamp4) self.assertEqual(self.publish_callback_value[0], self.particle_a) self.assertEqual(self.publish_callback_value[1], self.particle_b) self.assertEqual(self.publish_callback_value[2], self.particle_c) self.assertEqual(self.publish_callback_value[3], self.particle_d)
def test_in_process_start(self): """ test starting a parser with a state in the middle of processing """ log.debug('Starting test_in_process_start') new_state = { StateKey.IN_PROCESS_DATA: [[868, 1254, 1, 0], [1444, 1830, 1, 0], [2020, 2406, 1, 0], [2665, 3050, 1, 0], [3240, 3627, 1, 0], [3817, 4204, 1, 0], [4461, 4847, 1, 0]], StateKey.UNPROCESSED_DATA: [[0, 32], [607, 678], [868, 1254], [1444, 1830], [2020, 2475], [2665, 3050], [3240, 3627], [3817, 4271], [4461, 5000]], StateKey.FILE_SIZE: 22000 } self.stream_handle = open( os.path.join(RESOURCE_PATH, 'node59p1_shorter.dat')) self.parser = AdcpsParser(self.config, new_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) result = self.parser.get_records(1) self.assert_result(result, B_IN_PROC, B_UN_PROC, self.particle_b) result = self.parser.get_records(2) self.assertEqual(result[0], self.particle_c) self.assertEqual(result[1], self.particle_d) self.assert_state(D_IN_PROC, D_UN_PROC) self.assertEqual(self.publish_callback_value[-1], self.particle_d) self.assertEqual(self.exception_callback_value, None)
def test_long_stream(self): log.debug('Starting test_long_stream') self.stream_handle = open( os.path.join(RESOURCE_PATH, 'node59p1_shorter.dat')) data = self.stream_handle.read() data_len = len(data) self.stream_handle.seek(0) self.state = { StateKey.UNPROCESSED_DATA: [[0, data_len]], StateKey.IN_PROCESS_DATA: [], StateKey.FILE_SIZE: data_len } self.parser = AdcpsParser(self.config, self.state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) result = self.parser.get_records(29) self.stream_handle.close() self.assertEqual(result[0], self.particle_a) self.assertEqual(result[1], self.particle_b) self.assertEqual(result[2], self.particle_c) self.assertEqual(result[3], self.particle_d) self.assertEqual(result[-2], self.particle_before_end) self.assertEqual(result[-1], self.particle_end) self.assert_state( [], [[0, 32], [607, 678], [2406, 2475], [4204, 4271], [6161, 6230], [7958, 8027], [15738, 15807], [17697, 17766], [19495, 19564], [21292, 21361], [21938, 22000]]) self.assertEqual(self.publish_callback_value[-2], self.particle_before_end) self.assertEqual(self.publish_callback_value[-1], self.particle_end) self.assertEqual(self.exception_callback_value, None)
def test_get_many(self): """ Read test data from the file and pull out multiple data particles at one time. Assert that the results are those we expected. """ log.debug('Starting test_get_many') self.state = { StateKey.UNPROCESSED_DATA: [[0, 5000]], StateKey.IN_PROCESS_DATA: [], StateKey.FILE_SIZE: 22000 } self.stream_handle = open( os.path.join(RESOURCE_PATH, 'node59p1_shorter.dat')) self.parser = AdcpsParser(self.config, self.state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) result = self.parser.get_records(5) self.stream_handle.close() self.assertEqual(result, [ self.particle_a, self.particle_b, self.particle_c, self.particle_d, self.particle_e ]) self.assert_state( [[3240, 3627, 1, 0], [3817, 4204, 1, 0], [4461, 4847, 1, 0]], [[0, 32], [607, 678], [2406, 2475], [3240, 3627], [3817, 4271], [4461, 5000]]) self.assertEqual(self.publish_callback_value[0], self.particle_a) self.assertEqual(self.publish_callback_value[1], self.particle_b) self.assertEqual(self.publish_callback_value[2], self.particle_c) self.assertEqual(self.publish_callback_value[3], self.particle_d) self.assertEqual(self.publish_callback_value[4], self.particle_e) self.assertEqual(self.exception_callback_value, None)
def test_simple(self): """ Read test data from the file and pull out data particles one at a time. Assert that the results are those we expected. """ log.debug('Starting test_simple') self.stream_handle = open( os.path.join(RESOURCE_PATH, 'node59p1_shorter.dat')) # NOTE: using the unprocessed data state of 0,5000 limits the file to reading # just 5000 bytes, so even though the file is longer it only reads the first # 5000 self.state = { StateKey.UNPROCESSED_DATA: [[0, 5000]], StateKey.IN_PROCESS_DATA: [], StateKey.FILE_SIZE: 22000 } self.parser = AdcpsParser(self.config, self.state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) result = self.parser.get_records(1) self.assert_result(result, A_IN_PROC, A_UN_PROC, self.particle_a) result = self.parser.get_records(1) self.assert_result(result, B_IN_PROC, B_UN_PROC, self.particle_b) result = self.parser.get_records(1) self.assert_result(result, C_IN_PROC, C_UN_PROC, self.particle_c) result = self.parser.get_records(1) self.assert_result(result, D_IN_PROC, D_UN_PROC, self.particle_d) self.stream_handle.close() self.assertEqual(self.exception_callback_value, None)
def _build_parser(self, parser_state, infile): """ Build and return the parser """ config = self._parser_config config.update({ 'particle_module': 'mi.dataset.parser.adcps', 'particle_class': 'AdcpsParserDataParticle' }) log.debug("MYCONFIG: %s", config) self._parser = AdcpsParser(config, parser_state, infile, self._save_parser_state, self._data_callback) return self._parser
def test_set_state(self): """ test changing the state after initializing """ log.debug('Starting test_set_state') self.state = { StateKey.UNPROCESSED_DATA: [[0, 3800]], StateKey.IN_PROCESS_DATA: [], StateKey.FILE_SIZE: 22000 } # add in c particle as unprocessed new_state = { StateKey.UNPROCESSED_DATA: [[0, 32], [607, 678], [1444, 1830], [2406, 2475], [2665, 3050], [3800, 5000]], StateKey.IN_PROCESS_DATA: [], StateKey.FILE_SIZE: 22000 } self.stream_handle = open( os.path.join(RESOURCE_PATH, 'node59p1_shorter.dat')) self.parser = AdcpsParser(self.config, self.state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) # there should only be 6 records, make sure we stop there result = self.parser.get_records(6) self.assert_state([], [[0, 32], [607, 678], [2406, 2475], [3627, 3800]]) result = self.parser.get_records(1) self.assertEqual(result, []) self.parser.set_state(new_state) result = self.parser.get_records(1) self.assert_result( result, [], [[0, 32], [607, 678], [2406, 2475], [2665, 3050], [3800, 5000]], self.particle_c) result = self.parser.get_records(1) self.assert_result(result, [], [[0, 32], [607, 678], [2406, 2475], [3800, 5000]], self.particle_e) self.assertEqual(self.exception_callback_value, None) self.stream_handle.close()
def test_simple(self): """ Read test data from the file and pull out data particles one at a time. Assert that the results are those we expected. """ log.debug('Starting test_simple') self.stream_handle = open( os.path.join(RESOURCE_PATH, 'node59p1_shorter.dat')) # NOTE: using the unprocessed data state of 0,5000 limits the file to reading # just 5000 bytes, so even though the file is longer it only reads the first # 5000 self.state = { StateKey.UNPROCESSED_DATA: [[0, 5000]], StateKey.IN_PROCESS_DATA: [], StateKey.TIMESTAMP: 0.0 } self.parser = AdcpsParser(self.config, self.state, self.stream_handle, self.state_callback, self.pub_callback) result = self.parser.get_records(1) self.assert_result( result, [[1447, 1833, 1, 0, 0], [3827, 4214, 1, 0, 1], [4471, 4857, 1, 0, 1]], [[0, 32], [222, 871], [1447, 3058], [3248, 4281], [4471, 5000]], self.timestamp4, self.particle_a) result = self.parser.get_records(1) self.assert_result( result, [[3827, 4214, 1, 0, 1], [4471, 4857, 1, 0, 1]], [[0, 32], [222, 871], [1833, 3058], [3248, 4281], [4471, 5000]], self.timestamp4, self.particle_b) result = self.parser.get_records(1) self.assert_result(result, [[4471, 4857, 1, 0, 1]], [[0, 32], [222, 871], [1833, 3058], [3248, 3827], [4214, 4281], [4471, 5000]], self.timestamp4, self.particle_c) result = self.parser.get_records(1) self.assert_result(result, [], [[0, 32], [222, 871], [1833, 3058], [3248, 3827], [4214, 4281], [4857, 5000]], self.timestamp4, self.particle_d) self.stream_handle.close()
def test_mid_state_start(self): """ test starting a parser with a state in the middle of processing """ log.debug('Starting test_mid_state_start') new_state = { StateKey.IN_PROCESS_DATA: [], StateKey.UNPROCESSED_DATA: [[0, 32], [607, 678], [1444, 5000]], StateKey.FILE_SIZE: 22000 } self.stream_handle = open( os.path.join(RESOURCE_PATH, 'node59p1_shorter.dat')) self.parser = AdcpsParser(self.config, new_state, self.stream_handle, self.state_callback, self.pub_callback, self.exception_callback) result = self.parser.get_records(1) self.assert_result(result, C_IN_PROC, C_UN_PROC, self.particle_c) result = self.parser.get_records(1) self.assert_result(result, D_IN_PROC, D_UN_PROC, self.particle_d) self.stream_handle.close() self.assertEqual(self.exception_callback_value, None)
def test_set_state(self): """ test changing the state after initializing """ log.debug('Starting test_set_state') self.state = { StateKey.UNPROCESSED_DATA: [[0, 4500]], StateKey.IN_PROCESS_DATA: [], StateKey.TIMESTAMP: 0.0 } new_state = { StateKey.UNPROCESSED_DATA: [[0, 32], [222, 871], [1833, 3058], [3248, 3827], [4214, 4281], [4471, 5000]], StateKey.IN_PROCESS_DATA: [], StateKey.TIMESTAMP: self.timestamp2 } self.stream_handle = open( os.path.join(RESOURCE_PATH, 'node59p1_shorter.dat')) self.parser = AdcpsParser( self.config, self.state, self.stream_handle, self.state_callback, self.pub_callback) # last one is the link to the data source # there should only be 6 records, make sure we stop there result = self.parser.get_records(6) self.assert_state([], [[0, 32], [222, 871], [1833, 3058], [3248, 3827], [4214, 4281], [4471, 4500]], self.timestamp3) result = self.parser.get_records(1) self.assertEqual(result, []) self.parser.set_state(new_state) result = self.parser.get_records(1) self.stream_handle.close() self.assert_result(result, [], [[0, 32], [222, 871], [1833, 3058], [3248, 3827], [4214, 4281], [4857, 5000]], self.timestamp4, self.particle_d)