Esempio n. 1
0
    def testCompressIO(self):
        if not np:
            return
        import pyarrow
        from numpy.testing import assert_array_equal

        data = np.random.random((1000, 100))
        serialized = pyarrow.serialize(data).to_buffer()

        bio = BytesIO()
        reader = dataserializer.CompressBufferReader(
            pyarrow.py_buffer(serialized), dataserializer.COMPRESS_FLAG_LZ4)
        while True:
            block = reader.read(128)
            if not block:
                break
            bio.write(block)

        compressed = bio.getvalue()
        assert_array_equal(data, dataserializer.loads(compressed))

        data_sink = bytearray(len(serialized))
        compressed_mv = memoryview(compressed)
        writer = dataserializer.DecompressBufferWriter(
            pyarrow.py_buffer(data_sink))
        pos = 0
        while pos < len(compressed):
            endpos = min(pos + 128, len(compressed))
            writer.write(compressed_mv[pos:endpos])
            pos = endpos

        assert_array_equal(data, pyarrow.deserialize(data_sink))
Esempio n. 2
0
    def testDataSerialize(self):
        try:
            import numpy as np
            from numpy.testing import assert_array_equal
        except ImportError:
            np = None

        try:
            import scipy.sparse as sps
        except ImportError:
            sps = None

        if np:
            array = np.random.rand(1000, 100)
            assert_array_equal(array, dataserializer.loads(dataserializer.dumps(array)))
            assert_array_equal(array, dataserializer.loads(dataserializer.dumps(
                array, compress=dataserializer.COMPRESS_FLAG_LZ4)))

            array = np.random.rand(1000, 100)
            assert_array_equal(array, dataserializer.load(BytesIO(dataserializer.dumps(array))))
            assert_array_equal(array, dataserializer.load(BytesIO(dataserializer.dumps(
                array, compress=dataserializer.COMPRESS_FLAG_LZ4))))

            array = np.random.rand(1000, 100).T  # test non c-contiguous
            assert_array_equal(array, dataserializer.loads(dataserializer.dumps(array)))
            assert_array_equal(array, dataserializer.loads(dataserializer.dumps(
                array, compress=dataserializer.COMPRESS_FLAG_LZ4)))

            array = np.float64(0.2345)
            assert_array_equal(array, dataserializer.loads(dataserializer.dumps(array)))
            assert_array_equal(array, dataserializer.loads(dataserializer.dumps(
                array, compress=dataserializer.COMPRESS_FLAG_LZ4)))

            fn = os.path.join(tempfile.gettempdir(), 'test_dump_file_%d.bin' % id(self))
            try:
                array = np.random.rand(1000, 100).T  # test non c-contiguous
                with open(fn, 'wb') as dump_file:
                    dataserializer.dump(array, dump_file)
                with open(fn, 'rb') as dump_file:
                    assert_array_equal(array, dataserializer.load(dump_file))
                with open(fn, 'wb') as dump_file:
                    dataserializer.dump(array, dump_file,
                                        compress=dataserializer.COMPRESS_FLAG_LZ4)
                with open(fn, 'rb') as dump_file:
                    assert_array_equal(array, dataserializer.load(dump_file))
            finally:
                if os.path.exists(fn):
                    os.unlink(fn)

        if sps:
            mat = sparse.SparseMatrix(sps.random(100, 100, 0.1, format='csr'))
            des_mat = dataserializer.loads(dataserializer.dumps(mat))
            self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0)

            des_mat = dataserializer.loads(dataserializer.dumps(
                mat, compress=dataserializer.COMPRESS_FLAG_LZ4))
            self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0)
Esempio n. 3
0
 def _read_data_batch(reader):
     bio = BytesIO()
     with reader:
         while True:
             buf = reader.read(io_size)
             if buf:
                 bio.write(buf)
             else:
                 break
     return dataserializer.loads(bio.getvalue())
Esempio n. 4
0
    def testArrowBufferIO(self):
        if not np:
            return
        import pyarrow
        from numpy.testing import assert_array_equal

        for compress in [
                dataserializer.CompressType.LZ4,
                dataserializer.CompressType.GZIP
        ]:
            if compress not in dataserializer.get_supported_compressions():
                continue

            data = np.random.random((1000, 100))
            serialized = pyarrow.serialize(data).to_buffer()

            # test complete read
            reader = ArrowBufferIO(pyarrow.py_buffer(serialized),
                                   'r',
                                   compress_out=compress)
            assert_array_equal(data, dataserializer.loads(reader.read()))

            # test partial read
            reader = ArrowBufferIO(pyarrow.py_buffer(serialized),
                                   'r',
                                   compress_out=compress)
            block = reader.read(128)
            data_left = reader.read()
            assert_array_equal(data, dataserializer.loads(block + data_left))

            # test read by chunks
            bio = BytesIO()
            reader = ArrowBufferIO(pyarrow.py_buffer(serialized),
                                   'r',
                                   compress_out=compress)
            while True:
                block = reader.read(128)
                if not block:
                    break
                bio.write(block)

            compressed = bio.getvalue()
            assert_array_equal(data, dataserializer.loads(compressed))

            # test write by chunks
            data_sink = bytearray(len(serialized))
            compressed_mv = memoryview(compressed)
            writer = ArrowBufferIO(pyarrow.py_buffer(data_sink), 'w')
            pos = 0
            while pos < len(compressed):
                endpos = min(pos + 128, len(compressed))
                writer.write(compressed_mv[pos:endpos])
                pos = endpos

            assert_array_equal(data, pyarrow.deserialize(data_sink))
Esempio n. 5
0
 def create_data_writer(self, session_id, chunk_key, data_size, sender_ref,
                        ensure_cached=True, timeout=0, callback=None):
     from mars.compat import BytesIO
     query_key = (session_id, chunk_key)
     if query_key in self._data_metas and \
             self._data_metas[query_key].status in (ReceiveStatus.RECEIVED, ReceiveStatus.RECEIVING):
         self.tell_promise(callback, self.address, self._data_metas[query_key].status)
         return
     self._data_metas[query_key] = ReceiverDataMeta(chunk_size=data_size, status=ReceiveStatus.RECEIVING)
     self._data_writers[query_key] = BytesIO()
     self.tell_promise(callback, self.address, None)
Esempio n. 6
0
    def testDataSerialize(self):
        try:
            import numpy as np
            from numpy.testing import assert_array_equal
        except ImportError:
            np = None

        try:
            import scipy.sparse as sps
        except ImportError:
            sps = None

        if np:
            array = np.random.rand(1000, 100)
            assert_array_equal(
                array, dataserializer.loads(dataserializer.dumps(array)))
            assert_array_equal(
                array,
                dataserializer.loads(
                    dataserializer.dumps(
                        array, compress=dataserializer.CompressType.LZ4)))
            if not six.PY2:
                assert_array_equal(
                    array,
                    dataserializer.loads(
                        dataserializer.dumps(
                            array, compress=dataserializer.CompressType.GZIP)))

            array = np.random.rand(1000, 100)
            assert_array_equal(
                array,
                dataserializer.load(BytesIO(dataserializer.dumps(array))))
            assert_array_equal(
                array,
                dataserializer.load(
                    BytesIO(
                        dataserializer.dumps(
                            array, compress=dataserializer.CompressType.LZ4))))
            if not six.PY2:
                assert_array_equal(
                    array,
                    dataserializer.load(
                        BytesIO(
                            dataserializer.dumps(
                                array,
                                compress=dataserializer.CompressType.GZIP))))

            array = np.random.rand(1000, 100).T  # test non c-contiguous
            assert_array_equal(
                array, dataserializer.loads(dataserializer.dumps(array)))
            assert_array_equal(
                array,
                dataserializer.loads(
                    dataserializer.dumps(
                        array, compress=dataserializer.CompressType.LZ4)))
            if not six.PY2:
                assert_array_equal(
                    array,
                    dataserializer.loads(
                        dataserializer.dumps(
                            array, compress=dataserializer.CompressType.GZIP)))

            array = np.float64(0.2345)
            assert_array_equal(
                array, dataserializer.loads(dataserializer.dumps(array)))
            assert_array_equal(
                array,
                dataserializer.loads(
                    dataserializer.dumps(
                        array, compress=dataserializer.CompressType.LZ4)))
            if not six.PY2:
                assert_array_equal(
                    array,
                    dataserializer.loads(
                        dataserializer.dumps(
                            array, compress=dataserializer.CompressType.GZIP)))

            # test structured arrays.
            rec_dtype = np.dtype([('a', 'int64'), ('b', 'double'),
                                  ('c', '<U8')])
            array = np.ones((100, ), dtype=rec_dtype)
            array_loaded = dataserializer.loads(dataserializer.dumps(array))
            self.assertEqual(array.dtype, array_loaded.dtype)
            assert_array_equal(array, array_loaded)

            fn = os.path.join(tempfile.gettempdir(),
                              'test_dump_file_%d.bin' % id(self))
            try:
                array = np.random.rand(1000, 100).T  # test non c-contiguous
                with open(fn, 'wb') as dump_file:
                    dataserializer.dump(array, dump_file)
                with open(fn, 'rb') as dump_file:
                    assert_array_equal(array, dataserializer.load(dump_file))

                with open(fn, 'wb') as dump_file:
                    dataserializer.dump(
                        array,
                        dump_file,
                        compress=dataserializer.CompressType.LZ4)
                with open(fn, 'rb') as dump_file:
                    assert_array_equal(array, dataserializer.load(dump_file))

                if not six.PY2:
                    with open(fn, 'wb') as dump_file:
                        dataserializer.dump(
                            array,
                            dump_file,
                            compress=dataserializer.CompressType.GZIP)
                    with open(fn, 'rb') as dump_file:
                        assert_array_equal(array,
                                           dataserializer.load(dump_file))
            finally:
                if os.path.exists(fn):
                    os.unlink(fn)

        if sps:
            mat = sparse.SparseMatrix(sps.random(100, 100, 0.1, format='csr'))
            des_mat = dataserializer.loads(dataserializer.dumps(mat))
            self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0)

            des_mat = dataserializer.loads(
                dataserializer.dumps(mat,
                                     compress=dataserializer.CompressType.LZ4))
            self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0)

            if not six.PY2:
                des_mat = dataserializer.loads(
                    dataserializer.dumps(
                        mat, compress=dataserializer.CompressType.GZIP))
                self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0)

            vector = sparse.SparseVector(sps.csr_matrix(np.random.rand(2)),
                                         shape=(2, ))
            des_vector = dataserializer.loads(dataserializer.dumps(vector))
            self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0)

            des_vector = dataserializer.loads(
                dataserializer.dumps(vector,
                                     compress=dataserializer.CompressType.LZ4))
            self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0)

            if not six.PY2:
                des_vector = dataserializer.loads(
                    dataserializer.dumps(
                        vector, compress=dataserializer.CompressType.GZIP))
                self.assertTrue(
                    (vector.spmatrix != des_vector.spmatrix).nnz == 0)
Esempio n. 7
0
    def testFileBufferIO(self):
        if not np:
            return
        from numpy.testing import assert_array_equal

        compressions = [dataserializer.CompressType.NONE] + \
            list(dataserializer.get_supported_compressions())

        for c1 in compressions:
            for c2 in compressions:
                data = np.random.random((1000, 100))

                # test complete read
                compressed_read_file = BytesIO(
                    dataserializer.dumps(data, compress=c1))
                reader = FileBufferIO(compressed_read_file,
                                      'r',
                                      compress_out=c2)
                compressed = reader.read()
                self.assertEqual(
                    c2,
                    dataserializer.read_file_header(compressed).compress)
                assert_array_equal(data, dataserializer.loads(compressed))

                # test partial read
                compressed_read_file = BytesIO(
                    dataserializer.dumps(data, compress=c1))
                reader = FileBufferIO(compressed_read_file,
                                      'r',
                                      compress_out=c2)
                block = reader.read(128)
                data_left = reader.read()
                assert_array_equal(data,
                                   dataserializer.loads(block + data_left))

                # test read by chunks
                bio = BytesIO()
                compressed_read_file = BytesIO(
                    dataserializer.dumps(data, compress=c1))
                reader = FileBufferIO(compressed_read_file,
                                      'r',
                                      compress_out=c2)
                while True:
                    block = reader.read(128)
                    if not block:
                        break
                    bio.write(block)

                compressed = bio.getvalue()
                self.assertEqual(
                    c2,
                    dataserializer.read_file_header(compressed).compress)
                assert_array_equal(data, dataserializer.loads(compressed))

                # test write by chunks
                compressed_read_file.seek(0)
                compressed_write_file = BytesIO()
                writer = FileBufferIO(compressed_write_file,
                                      'w',
                                      compress_in=c2,
                                      managed=False)
                while True:
                    block = compressed_read_file.read(128)
                    if not block:
                        break
                    writer.write(block)
                writer.close()

                compressed = compressed_write_file.getvalue()
                self.assertEqual(
                    c2,
                    dataserializer.read_file_header(compressed).compress)
                assert_array_equal(data, dataserializer.loads(compressed))