Ejemplo n.º 1
0
    def test_get_many(self):
        """
        This test exercises retrieving 20 particles, verifying the 20th particle, then retrieves 30 particles
         and verifies the 30th particle.
        """

        file_path = os.path.join(RESOURCE_PATH, 'get_many.mpk')
        stream_handle = open(file_path, 'rb')

        parser = OptaaAcMmpCdsParser(self.config, None, stream_handle,
                                     self.state_callback, self.pub_callback)

        particles = parser.get_records(20)

        # Should end up with 20 particles
        self.assertTrue(len(particles) == 20)

        test_data = self.get_dict_from_yml('get_many.yml')

        for i in range(len(particles)):
            self.assert_result(test_data['data'][i], particles[i])

        particles = parser.get_records(30)

        # Should end up with 30 particles
        self.assertTrue(len(particles) == 30)

        for i in range(len(particles)):
            self.assert_result(test_data['data'][i+20], particles[i])

        stream_handle.close()
Ejemplo n.º 2
0
    def test_mid_state_start(self):
        """
        This test exercises setting the state past one chunk, retrieving particles and verify the result of one
        of the particles.
        """

        # Using two concatenated msgpack files to simulate two chunks.
        file_path = os.path.join(RESOURCE_PATH, 'set_state.mpk')
        stream_handle = open(file_path, 'rb')

        # Moving the file position to the end of the first chunk
        state = {StateKey.PARTICLES_RETURNED: 20}

        parser = OptaaAcMmpCdsParser(self.config, state, stream_handle,
                                     self.state_callback, self.pub_callback)

        particles = parser.get_records(4)

        log.info(len(particles))

        # Should end up with 4 particles
        self.assertTrue(len(particles) == 4)

        test_data = self.get_dict_from_yml('set_state.yml')

        for i in range(len(particles)):
            self.assert_result(test_data['data'][20 + i], particles[i])

        stream_handle.close()
Ejemplo n.º 3
0
    def test_bad_data_one(self):
        """
        This test verifies that a SampleException is raised when msgpack data is malformed.
        """

        file_path = os.path.join(RESOURCE_PATH, 'acs_archive.mpk')
        stream_handle = open(file_path, 'rb')

        parser = OptaaAcMmpCdsParser(self.config, None, stream_handle,
                                     self.state_callback, self.pub_callback)

        particles = parser.get_records(100)

        self.assertTrue(len(particles) == 40)

        stream_handle.close()

        file_path = os.path.join(RESOURCE_PATH, 'acs_archive_BAD.mpk')
        stream_handle = open(file_path, 'rb')

        parser = OptaaAcMmpCdsParser(self.config, None, stream_handle,
                                     self.state_callback, self.pub_callback)

        with self.assertRaises(SampleException):
            parser.get_records(1)

        stream_handle.close()
Ejemplo n.º 4
0
    def test_mid_state_start(self):
        """
        This test exercises setting the state past one chunk, retrieving particles and verify the result of one
        of the particles.
        """

        # Using two concatenated msgpack files to simulate two chunks.
        file_path = os.path.join(RESOURCE_PATH, 'set_state.mpk')
        stream_handle = open(file_path, 'rb')

        # Moving the file position to the end of the first chunk
        state = {StateKey.PARTICLES_RETURNED: 20}

        parser = OptaaAcMmpCdsParser(self.config, state, stream_handle,
                                     self.state_callback, self.pub_callback)

        particles = parser.get_records(4)

        # Should end up with 4 particles
        self.assertTrue(len(particles) == 4)

        test_data = self.get_dict_from_yml('set_state.yml')

        for i in range(len(particles)):
            self.assert_result(test_data['data'][20+i], particles[i])

        stream_handle.close()
Ejemplo n.º 5
0
    def test_get_many(self):
        """
        This test exercises retrieving 20 particles, verifying the 20th particle, then retrieves 30 particles
         and verifies the 30th particle.
        """

        file_path = os.path.join(RESOURCE_PATH, 'get_many.mpk')
        stream_handle = open(file_path, 'rb')

        parser = OptaaAcMmpCdsParser(self.config, None, stream_handle,
                                     self.state_callback, self.pub_callback)

        particles = parser.get_records(20)

        # Should end up with 20 particles
        self.assertTrue(len(particles) == 20)

        test_data = self.get_dict_from_yml('get_many.yml')

        for i in range(len(particles)):
            self.assert_result(test_data['data'][i], particles[i])

        particles = parser.get_records(30)

        # Should end up with 30 particles
        self.assertTrue(len(particles) == 30)

        for i in range(len(particles)):
            self.assert_result(test_data['data'][i + 20], particles[i])

        stream_handle.close()
    def test_bad_data_two(self):
        """
        This test verifies that a SampleException is raised when an entire msgpack buffer is not msgpack.
        """

        file_path = os.path.join(RESOURCE_PATH, "not-msg-pack.mpk")
        stream_handle = open(file_path, "rb")

        parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback)

        with self.assertRaises(SampleException):
            parser.get_records(1)

        stream_handle.close()
Ejemplo n.º 7
0
    def test_bad_data_two(self):
        """
        This test verifies that a SampleException is raised when an entire msgpack buffer is not msgpack.
        """

        file_path = os.path.join(RESOURCE_PATH, 'not-msg-pack.mpk')
        stream_handle = open(file_path, 'rb')

        parser = OptaaAcMmpCdsParser(self.config, None, stream_handle,
                                     self.state_callback, self.pub_callback)

        with self.assertRaises(SampleException):
            parser.get_records(1)

        stream_handle.close()
    def test_simple(self):
        """
        This test reads in a small number of particles and verifies the result of one of the particles.
        """

        file_path = os.path.join(RESOURCE_PATH, "simple.mpk")
        stream_handle = open(file_path, "rb")

        parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback)

        particles = parser.get_records(1)

        test_data = self.get_dict_from_yml("simple.yml")
        self.assert_result(test_data["data"][0], particles[0])

        stream_handle.close()
Ejemplo n.º 9
0
    def test_simple(self):
        """
        This test reads in a small number of particles and verifies the result of one of the particles.
        """

        file_path = os.path.join(RESOURCE_PATH, 'simple.mpk')
        stream_handle = open(file_path, 'rb')

        parser = OptaaAcMmpCdsParser(self.config, None, stream_handle,
                                     self.state_callback, self.pub_callback)

        particles = parser.get_records(1)

        test_data = self.get_dict_from_yml('simple.yml')
        self.assert_result(test_data['data'][0], particles[0])

        stream_handle.close()
Ejemplo n.º 10
0
    def test_bad_data_one(self):
        """
        This test verifies that a SampleException is raised when msgpack data is malformed.
        """

        file_path = os.path.join(RESOURCE_PATH, 'acs_archive.mpk')
        stream_handle = open(file_path, 'rb')

        parser = OptaaAcMmpCdsParser(self.config, None, stream_handle,
                                     self.state_callback, self.pub_callback)

        particles = parser.get_records(100)

        self.assertTrue(len(particles) == 40)

        stream_handle.close()

        file_path = os.path.join(RESOURCE_PATH, 'acs_archive_BAD.mpk')
        stream_handle = open(file_path, 'rb')

        parser = OptaaAcMmpCdsParser(self.config, None, stream_handle,
                                     self.state_callback, self.pub_callback)

        with self.assertRaises(SampleException):
            parser.get_records(1)

        stream_handle.close()
    def test_long_stream(self):
        """
        This test exercises retrieve approximately 200 particles.
        """

        file_path = os.path.join(RESOURCE_PATH, "large_import.mpk")
        stream_handle = open(file_path, "rb")

        parser = OptaaAcMmpCdsParser(self.config, None, stream_handle, self.state_callback, self.pub_callback)

        # Attempt to retrieve 500 particles
        particles = parser.get_records(500)

        # Should end up with 500 particles
        self.assertTrue(len(particles) == 500)

        test_data = self.get_dict_from_yml("large_import.yml")

        for i in range(len(particles)):
            self.assert_result(test_data["data"][i], particles[i])

        stream_handle.close()
Ejemplo n.º 12
0
    def test_long_stream(self):
        """
        This test exercises retrieve approximately 200 particles.
        """

        file_path = os.path.join(RESOURCE_PATH, 'large_import.mpk')
        stream_handle = open(file_path, 'rb')

        parser = OptaaAcMmpCdsParser(self.config, None, stream_handle,
                                     self.state_callback, self.pub_callback)

        # Attempt to retrieve 500 particles
        particles = parser.get_records(500)

        # Should end up with 500 particles
        self.assertTrue(len(particles) == 500)

        test_data = self.get_dict_from_yml('large_import.yml')

        for i in range(len(particles)):
            self.assert_result(test_data['data'][i], particles[i])

        stream_handle.close()
    def _build_parser(self, stream_handle):

        parser_config = {
            DataSetDriverConfigKeys.PARTICLE_MODULE: 'mi.dataset.parser.optaa_ac_mmp_cds',
            DataSetDriverConfigKeys.PARTICLE_CLASS: 'OptaaAcMmpCdsParserDataParticle'
        }

        parser = OptaaAcMmpCdsParser(parser_config,
                                    None,
                                    stream_handle,
                                    lambda state, ingested: None,
                                    lambda data: None,
                                    self._exception_callback)

        return parser
Ejemplo n.º 14
0
 def _build_parser(self, parser_state, infile):
     """
     Build and return the parser
     """
     config = self._parser_config
     config.update({
         DataSetDriverConfigKeys.PARTICLE_MODULE:
         'mi.dataset.parser.optaa_ac_mmp_cds',
         DataSetDriverConfigKeys.PARTICLE_CLASS:
         'OptaaAcMmpCdsParserDataParticle'
     })
     log.debug("My Config: %s", config)
     self._parser = OptaaAcMmpCdsParser(config, parser_state, infile,
                                        self._save_parser_state,
                                        self._data_callback,
                                        self._sample_exception_callback)
     return self._parser
Ejemplo n.º 15
0
    def test_set_state(self):
        """
        This test exercises setting the state past one chunk, retrieving particles, verifying one
        of the particles, and then setting the state back to the beginning, retrieving a few particles, and
        verifying one of the particles.
        """

        # Using the default mspack test file.
        file_path = os.path.join(RESOURCE_PATH, 'set_state.mpk')
        stream_handle = open(file_path, 'rb')

        parser = OptaaAcMmpCdsParser(self.config, None, stream_handle,
                                     self.state_callback, self.pub_callback)

        particles = parser.get_records(4)

        # Should end up with 4 particles
        self.assertTrue(len(particles) == 4)

        test_data = self.get_dict_from_yml('set_state.yml')

        for i in range(len(particles)):
            self.assert_result(test_data['data'][i], particles[i])

        state = copy.copy(parser._state)

        # Re-create the parser with a state of None
        parser = OptaaAcMmpCdsParser(self.config, None, stream_handle,
                                     self.state_callback, self.pub_callback)

        # Retrieve the first 4 particles again
        particles = parser.get_records(4)

        # Check the particles again
        for i in range(len(particles)):
            self.assert_result(test_data['data'][i], particles[i])

        # Set the parser's state
        parser.set_state(state)

        particles = parser.get_records(4)

        # Should end up with 4 particles
        self.assertTrue(len(particles) == 4)

        for i in range(len(particles)):
            self.assert_result(test_data['data'][4+i], particles[i])

        # Reset the state to 0 particles returned
        parser.set_state({StateKey.PARTICLES_RETURNED: 0})

        particles = parser.get_records(1)

        self.assertTrue(len(particles) == 1)

        # Check the particle
        self.assert_result(test_data['data'][0], particles[0])

        # Set the state to a bad state (i.e. None)
        with self.assertRaises(DatasetParserException):
            parser.set_state(None)

        # Set the state to a bad state (i.e. a list)
        with self.assertRaises(DatasetParserException):
            parser.set_state(['particles_returned'])

        # Set the state to 0 particles returned
        parser.set_state({StateKey.PARTICLES_RETURNED: 0})

        # Attempt to retrieve 1000 particles
        particles = parser.get_records(1000)

        # Ensure we got all expected 30
        self.assertTrue(len(particles) == 30)

        for i in range(len(particles)):
            self.assert_result(test_data['data'][i], particles[i])

        # Provide a bad particles returned
        state = {StateKey.PARTICLES_RETURNED: 80}

        parser = OptaaAcMmpCdsParser(self.config, state, stream_handle,
                                     self.state_callback, self.pub_callback)

        particles = parser.get_records(1)

        self.assertTrue(len(particles) == 0)

        stream_handle.close()
Ejemplo n.º 16
0
    def test_set_state(self):
        """
        This test exercises setting the state past one chunk, retrieving particles, verifying one
        of the particles, and then setting the state back to the beginning, retrieving a few particles, and
        verifying one of the particles.
        """

        # Using the default mspack test file.
        file_path = os.path.join(RESOURCE_PATH, 'set_state.mpk')
        stream_handle = open(file_path, 'rb')

        parser = OptaaAcMmpCdsParser(self.config, None, stream_handle,
                                     self.state_callback, self.pub_callback)

        particles = parser.get_records(4)

        # Should end up with 4 particles
        self.assertTrue(len(particles) == 4)

        log.info(parser._state)

        test_data = self.get_dict_from_yml('set_state.yml')

        for i in range(len(particles)):
            self.assert_result(test_data['data'][i], particles[i])

        state = copy.copy(parser._state)

        log.info(state)

        # Re-create the parser with a state of None
        parser = OptaaAcMmpCdsParser(self.config, None, stream_handle,
                                     self.state_callback, self.pub_callback)

        # Retrieve the first 4 particles again
        particles = parser.get_records(4)

        # Check the particles again
        for i in range(len(particles)):
            self.assert_result(test_data['data'][i], particles[i])

        # Set the parser's state
        parser.set_state(state)

        particles = parser.get_records(4)

        # Should end up with 4 particles
        self.assertTrue(len(particles) == 4)

        for i in range(len(particles)):
            self.assert_result(test_data['data'][4 + i], particles[i])

        # Reset the state to 0 particles returned
        parser.set_state({StateKey.PARTICLES_RETURNED: 0})

        particles = parser.get_records(1)

        self.assertTrue(len(particles) == 1)

        # Check the particle
        self.assert_result(test_data['data'][0], particles[0])

        # Set the state to a bad state (i.e. None)
        with self.assertRaises(DatasetParserException):
            parser.set_state(None)

        # Set the state to a bad state (i.e. a list)
        with self.assertRaises(DatasetParserException):
            parser.set_state(['particles_returned'])

        # Set the state to 0 particles returned
        parser.set_state({StateKey.PARTICLES_RETURNED: 0})

        # Attempt to retrieve 1000 particles
        particles = parser.get_records(1000)

        # Ensure we got all expected 30
        self.assertTrue(len(particles) == 30)

        for i in range(len(particles)):
            self.assert_result(test_data['data'][i], particles[i])

        # Provide a bad particles returned
        state = {StateKey.PARTICLES_RETURNED: 80}

        parser = OptaaAcMmpCdsParser(self.config, state, stream_handle,
                                     self.state_callback, self.pub_callback)

        particles = parser.get_records(1)

        self.assertTrue(len(particles) == 0)

        stream_handle.close()