def test_get_many(self): """ This test exercises retrieving 20 particles, verifying the 20th particle, then retrieves 30 particles and verifies the 30th particle. """ file_path = os.path.join(RESOURCE_PATH, 'get_many.mpk') stream_handle = open(file_path, 'rb') parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(20) # Should end up with 20 particles self.assertTrue(len(particles) == 20) test_data = self.get_dict_from_yml('get_many.yml') for i in range(len(particles)): self.assert_result(test_data['data'][i], particles[i]) particles = parser.get_records(30) # Should end up with 30 particles self.assertTrue(len(particles) == 30) for i in range(len(particles)): self.assert_result(test_data['data'][i+20], particles[i]) stream_handle.close()
def test_mid_state_start(self): """ This test exercises setting the state past one chunk, retrieving particles and verify the result of one of the particles. """ # Using two concatenated msgpack files to simulate two chunks. file_path = os.path.join(RESOURCE_PATH, 'set_state.mpk') stream_handle = open(file_path, 'rb') # Moving the file position to the end of the first chunk state = {StateKey.PARTICLES_RETURNED: 20} parser = OptaaAcMmpCdsParser(self.config, state, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(4) log.info(len(particles)) # Should end up with 4 particles self.assertTrue(len(particles) == 4) test_data = self.get_dict_from_yml('set_state.yml') for i in range(len(particles)): self.assert_result(test_data['data'][20 + i], particles[i]) stream_handle.close()
def test_bad_data_one(self): """ This test verifies that a SampleException is raised when msgpack data is malformed. """ file_path = os.path.join(RESOURCE_PATH, 'acs_archive.mpk') stream_handle = open(file_path, 'rb') parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(100) self.assertTrue(len(particles) == 40) stream_handle.close() file_path = os.path.join(RESOURCE_PATH, 'acs_archive_BAD.mpk') stream_handle = open(file_path, 'rb') parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback) with self.assertRaises(SampleException): parser.get_records(1) stream_handle.close()
def test_mid_state_start(self): """ This test exercises setting the state past one chunk, retrieving particles and verify the result of one of the particles. """ # Using two concatenated msgpack files to simulate two chunks. file_path = os.path.join(RESOURCE_PATH, 'set_state.mpk') stream_handle = open(file_path, 'rb') # Moving the file position to the end of the first chunk state = {StateKey.PARTICLES_RETURNED: 20} parser = OptaaAcMmpCdsParser(self.config, state, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(4) # Should end up with 4 particles self.assertTrue(len(particles) == 4) test_data = self.get_dict_from_yml('set_state.yml') for i in range(len(particles)): self.assert_result(test_data['data'][20+i], particles[i]) stream_handle.close()
def test_get_many(self): """ This test exercises retrieving 20 particles, verifying the 20th particle, then retrieves 30 particles and verifies the 30th particle. """ file_path = os.path.join(RESOURCE_PATH, 'get_many.mpk') stream_handle = open(file_path, 'rb') parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(20) # Should end up with 20 particles self.assertTrue(len(particles) == 20) test_data = self.get_dict_from_yml('get_many.yml') for i in range(len(particles)): self.assert_result(test_data['data'][i], particles[i]) particles = parser.get_records(30) # Should end up with 30 particles self.assertTrue(len(particles) == 30) for i in range(len(particles)): self.assert_result(test_data['data'][i + 20], particles[i]) stream_handle.close()
def test_bad_data_two(self): """ This test verifies that a SampleException is raised when an entire msgpack buffer is not msgpack. """ file_path = os.path.join(RESOURCE_PATH, "not-msg-pack.mpk") stream_handle = open(file_path, "rb") parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback) with self.assertRaises(SampleException): parser.get_records(1) stream_handle.close()
def test_bad_data_two(self): """ This test verifies that a SampleException is raised when an entire msgpack buffer is not msgpack. """ file_path = os.path.join(RESOURCE_PATH, 'not-msg-pack.mpk') stream_handle = open(file_path, 'rb') parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback) with self.assertRaises(SampleException): parser.get_records(1) stream_handle.close()
def test_simple(self): """ This test reads in a small number of particles and verifies the result of one of the particles. """ file_path = os.path.join(RESOURCE_PATH, "simple.mpk") stream_handle = open(file_path, "rb") parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(1) test_data = self.get_dict_from_yml("simple.yml") self.assert_result(test_data["data"][0], particles[0]) stream_handle.close()
def test_simple(self): """ This test reads in a small number of particles and verifies the result of one of the particles. """ file_path = os.path.join(RESOURCE_PATH, 'simple.mpk') stream_handle = open(file_path, 'rb') parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(1) test_data = self.get_dict_from_yml('simple.yml') self.assert_result(test_data['data'][0], particles[0]) stream_handle.close()
def test_bad_data_one(self): """ This test verifies that a SampleException is raised when msgpack data is malformed. """ file_path = os.path.join(RESOURCE_PATH, 'acs_archive.mpk') stream_handle = open(file_path, 'rb') parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(100) self.assertTrue(len(particles) == 40) stream_handle.close() file_path = os.path.join(RESOURCE_PATH, 'acs_archive_BAD.mpk') stream_handle = open(file_path, 'rb') parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback) with self.assertRaises(SampleException): parser.get_records(1) stream_handle.close()
def test_long_stream(self): """ This test exercises retrieve approximately 200 particles. """ file_path = os.path.join(RESOURCE_PATH, "large_import.mpk") stream_handle = open(file_path, "rb") parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback) # Attempt to retrieve 500 particles particles = parser.get_records(500) # Should end up with 500 particles self.assertTrue(len(particles) == 500) test_data = self.get_dict_from_yml("large_import.yml") for i in range(len(particles)): self.assert_result(test_data["data"][i], particles[i]) stream_handle.close()
def test_long_stream(self): """ This test exercises retrieve approximately 200 particles. """ file_path = os.path.join(RESOURCE_PATH, 'large_import.mpk') stream_handle = open(file_path, 'rb') parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback) # Attempt to retrieve 500 particles particles = parser.get_records(500) # Should end up with 500 particles self.assertTrue(len(particles) == 500) test_data = self.get_dict_from_yml('large_import.yml') for i in range(len(particles)): self.assert_result(test_data['data'][i], particles[i]) stream_handle.close()
def _build_parser(self, stream_handle): parser_config = { DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.optaa_ac_mmp_cds', DataSetDriverConfigKeys.PARTICLE_CLASS: 'OptaaAcMmpCdsParserDataParticle' } parser = OptaaAcMmpCdsParser(parser_config, None, stream_handle, lambda state, ingested: None, lambda data: None, self._exception_callback) return parser
def _build_parser(self, parser_state, infile): """ Build and return the parser """ config = self._parser_config config.update({ DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.optaa_ac_mmp_cds', DataSetDriverConfigKeys.PARTICLE_CLASS: 'OptaaAcMmpCdsParserDataParticle' }) log.debug("My Config: %s", config) self._parser = OptaaAcMmpCdsParser(config, parser_state, infile, self._save_parser_state, self._data_callback, self._sample_exception_callback) return self._parser
def test_set_state(self): """ This test exercises setting the state past one chunk, retrieving particles, verifying one of the particles, and then setting the state back to the beginning, retrieving a few particles, and verifying one of the particles. """ # Using the default mspack test file. file_path = os.path.join(RESOURCE_PATH, 'set_state.mpk') stream_handle = open(file_path, 'rb') parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(4) # Should end up with 4 particles self.assertTrue(len(particles) == 4) test_data = self.get_dict_from_yml('set_state.yml') for i in range(len(particles)): self.assert_result(test_data['data'][i], particles[i]) state = copy.copy(parser._state) # Re-create the parser with a state of None parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback) # Retrieve the first 4 particles again particles = parser.get_records(4) # Check the particles again for i in range(len(particles)): self.assert_result(test_data['data'][i], particles[i]) # Set the parser's state parser.set_state(state) particles = parser.get_records(4) # Should end up with 4 particles self.assertTrue(len(particles) == 4) for i in range(len(particles)): self.assert_result(test_data['data'][4+i], particles[i]) # Reset the state to 0 particles returned parser.set_state({StateKey.PARTICLES_RETURNED: 0}) particles = parser.get_records(1) self.assertTrue(len(particles) == 1) # Check the particle self.assert_result(test_data['data'][0], particles[0]) # Set the state to a bad state (i.e. None) with self.assertRaises(DatasetParserException): parser.set_state(None) # Set the state to a bad state (i.e. a list) with self.assertRaises(DatasetParserException): parser.set_state(['particles_returned']) # Set the state to 0 particles returned parser.set_state({StateKey.PARTICLES_RETURNED: 0}) # Attempt to retrieve 1000 particles particles = parser.get_records(1000) # Ensure we got all expected 30 self.assertTrue(len(particles) == 30) for i in range(len(particles)): self.assert_result(test_data['data'][i], particles[i]) # Provide a bad particles returned state = {StateKey.PARTICLES_RETURNED: 80} parser = OptaaAcMmpCdsParser(self.config, state, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(1) self.assertTrue(len(particles) == 0) stream_handle.close()
def test_set_state(self): """ This test exercises setting the state past one chunk, retrieving particles, verifying one of the particles, and then setting the state back to the beginning, retrieving a few particles, and verifying one of the particles. """ # Using the default mspack test file. file_path = os.path.join(RESOURCE_PATH, 'set_state.mpk') stream_handle = open(file_path, 'rb') parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(4) # Should end up with 4 particles self.assertTrue(len(particles) == 4) log.info(parser._state) test_data = self.get_dict_from_yml('set_state.yml') for i in range(len(particles)): self.assert_result(test_data['data'][i], particles[i]) state = copy.copy(parser._state) log.info(state) # Re-create the parser with a state of None parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback) # Retrieve the first 4 particles again particles = parser.get_records(4) # Check the particles again for i in range(len(particles)): self.assert_result(test_data['data'][i], particles[i]) # Set the parser's state parser.set_state(state) particles = parser.get_records(4) # Should end up with 4 particles self.assertTrue(len(particles) == 4) for i in range(len(particles)): self.assert_result(test_data['data'][4 + i], particles[i]) # Reset the state to 0 particles returned parser.set_state({StateKey.PARTICLES_RETURNED: 0}) particles = parser.get_records(1) self.assertTrue(len(particles) == 1) # Check the particle self.assert_result(test_data['data'][0], particles[0]) # Set the state to a bad state (i.e. None) with self.assertRaises(DatasetParserException): parser.set_state(None) # Set the state to a bad state (i.e. a list) with self.assertRaises(DatasetParserException): parser.set_state(['particles_returned']) # Set the state to 0 particles returned parser.set_state({StateKey.PARTICLES_RETURNED: 0}) # Attempt to retrieve 1000 particles particles = parser.get_records(1000) # Ensure we got all expected 30 self.assertTrue(len(particles) == 30) for i in range(len(particles)): self.assert_result(test_data['data'][i], particles[i]) # Provide a bad particles returned state = {StateKey.PARTICLES_RETURNED: 80} parser = OptaaAcMmpCdsParser(self.config, state, stream_handle, self.state_callback, self.pub_callback) particles = parser.get_records(1) self.assertTrue(len(particles) == 0) stream_handle.close()