Ejemplo n.º 1
0
 def __init__(self):
     if Zstd is None:
         raise UnsupportedCompressionMethodError
     self._ctc = Zstd.ZstdCompressor()  # type: ignore
Ejemplo n.º 2
0
def compress_stream_writer_size(chunks, zparams):
    zctx = zstd.ZstdCompressor(compression_params=zparams)
    for chunk in chunks:
        b = bio()
        with zctx.stream_writer(b, size=len(chunk)) as compressor:
            compressor.write(chunk)
Ejemplo n.º 3
0
def compress(data, level):
    buffer = io.BytesIO()
    cctx = zstd.ZstdCompressor(level=level)
    with cctx.write_to(buffer) as compressor:
        compressor.write(data)
    return buffer.getvalue()
Ejemplo n.º 4
0
            c = zlib.compress(chunk, args.zlib_level)
            compressed_discrete_zlib.append(c)
            ratios.append(float(len(c)) / float(len(chunk)))

        compressed_size = sum(map(len, compressed_discrete_zlib))
        ratio = float(compressed_size) / float(orig_size) * 100.0
        bad_count = sum(1 for r in ratios if r >= 1.00)
        good_ratio = 100.0 - (float(bad_count) / float(len(chunks)) * 100.0)
        print(
            "zlib discrete compressed size (l=%d): %d (%.2f%%); smaller: %.2f%%"
            % (args.zlib_level, compressed_size, ratio, good_ratio))

    # In discrete mode, each input is compressed independently, possibly
    # with a dictionary.
    if args.discrete:
        zctx = zstd.ZstdCompressor(compression_params=zparams)
        compressed_discrete = []
        ratios = []
        # Always use multiple threads here so we complete faster.
        if hasattr(zctx, "multi_compress_to_buffer"):
            for i, c in enumerate(
                    zctx.multi_compress_to_buffer(chunks, threads=-1)):
                compressed_discrete.append(c.tobytes())
                ratios.append(float(len(c)) / float(len(chunks[i])))
        else:
            for chunk in chunks:
                compressed = zctx.compress(chunk)
                compressed_discrete.append(chunk)
                ratios.append(float(len(compressed)) / float(len(chunk)))

        compressed_size = sum(map(len, compressed_discrete))
Ejemplo n.º 5
0
def open_tar_zst(path):
    cctx = zstandard.ZstdCompressor()
    with open(path, "wb") as f:
        with cctx.stream_writer(f) as compressor:
            with tarfile.open(mode="w|", fileobj=compressor) as tar:
                yield tar
Ejemplo n.º 6
0
 def test_compressobj_empty(self):
     cctx = zstd.ZstdCompressor(level=1)
     cobj = cctx.compressobj()
     self.assertEqual(cobj.compress(b''), b'')
     self.assertEqual(cobj.flush(), b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
Ejemplo n.º 7
0
 def test_empty_roundtrip(self):
     cctx = zstd.ZstdCompressor()
     empty = cctx.compress(b'')
     self.assertEqual(decompress_via_writer(empty), b'')
Ejemplo n.º 8
0
def compress_content_dict_compress(chunks, zparams):
    zstd.ZstdCompressor(compression_params=zparams).compress(chunks[0])
    for i, chunk in enumerate(chunks[1:]):
        d = zstd.ZstdCompressionDict(chunks[i])
        zstd.ZstdCompressor(dict_data=d,
                            compression_params=zparams).compress(chunk)
Ejemplo n.º 9
0
 def save(self, path):
     with open(path, "wb") as fh:
         cctx = zstandard.ZstdCompressor()
         with cctx.stream_writer(fh) as compressor:
             self.imls_log_daily = sparse.COO.from_numpy(self.imls_log_daily)
             compressor.write(pickle.dumps(self, protocol=4))
Ejemplo n.º 10
0
    def test_simple(self):
        data = zstd.ZstdCompressor(level=1).compress(b'foobar')

        dctx = zstd.ZstdDecompressor()
        dobj = dctx.decompressobj()
        self.assertEqual(dobj.decompress(data), b'foobar')
Ejemplo n.º 11
0
from enum import Enum, auto
import logging
import asyncio
import struct
import json
import zstandard as zstd

_compressor = zstd.ZstdCompressor()
_decompressor = zstd.ZstdDecompressor()
logger = logging.getLogger(__name__)


class MessageType(Enum):
    REQUEST_REGISTER = auto()
    REQUEST_PUBLISH = auto()
    REQUEST_FILE_LIST = auto()
    REQUEST_FILE_LOCATION = auto()
    REQUEST_CHUNK_REGISTER = auto()
    REPLY_REGISTER = auto()
    REPLY_FILE_LIST = auto()
    REPLY_PUBLISH = auto()
    REPLY_FILE_LOCATION = auto()
    PEER_REQUEST_CHUNK = auto()
    PEER_REPLY_CHUNK = auto()
    PEER_PING_PONG = auto()


def _message_log(message):
    log_message = {key: message[key] for key in message if key != 'data'}
    log_message['type'] = MessageType(message['type']).name
    return log_message
Ejemplo n.º 12
0
    def start(self):
        '''
        Prepare for processing based on available input parameters. Assume source data is not available at this moment.
        Create output files and internal buffers
        '''

        # create part-file
        base_filename = self._init_params.image_filename.stem
        self._intermediate_file_name = os.path.join(
            self._init_params._output_directory,
            base_filename + '.rc' + str(self._input_params.reduction_level) +
            '_part' + '{0:03d}'.format(self._node_id))
        self._intermediate_file = open(self._intermediate_file_name, 'wb')

        # serialize ReCoDe header
        self._header.serialize_to(self._intermediate_file)

        # serialize source header
        # self._source.serialize_header(str(self._intermediate_file_name))

        # create validation file
        if self._init_params.validation_frame_gap > 0:
            self._validation_file_name = os.path.join(
                self._init_params._output_directory, base_filename + '_part' +
                '{0:03d}'.format(self._node_id) + '_validation_frames.bin')
            self._validation_file = open(self._validation_file_name, 'wb')

        # create buffer to hold reduced_compressed data
        self._buffer_sz = 1000  # best to ensure buffer size is large enough to hold the expected amount of data to be processed by this thread for a single chunk
        self._rct_buffer = bytearray(self._buffer_sz)
        self._rct_buffer_fill_position = -1
        self._available_buffer_space = self._buffer_sz

        # self._bytes_per_pixel = np.dtype(get_dtype_string(self._header._rc_header["source_dtype"])).itemsize
        self._bytes_per_pixel = self._src_dtype.itemsize
        self._n_pixels_in_frame = self._header._rc_header[
            'ny'] * self._header._rc_header['nx']
        self._frame_sz = np.uint64(
            self._n_pixels_in_frame) * self._bytes_per_pixel
        self._frame_buffer = bytearray(self._buffer_sz)
        self._n_bytes_in_binary_image = math.ceil(self._n_pixels_in_frame / 8)

        if self._init_params.use_C:
            self._c_reader = c_recode.Reader()
            _max_sz = int(
                math.ceil((self._n_pixels_in_frame *
                           self._input_params.source_bit_depth * 1.0) / 8.0))
            self._pixvals = memoryview(
                bytearray(self._n_pixels_in_frame * self._bytes_per_pixel))
            self._packed_pixvals = memoryview(bytearray(_max_sz))

        self._chunk_offset = 0
        self._num_frames_in_part = 0

        # initialize validation counting parameters
        self._vc_roi['nx'] = min(self._header._rc_header['nx'], 128)
        self._vc_roi['ny'] = min(self._header._rc_header['ny'], 128)
        self._vc_roi['x_start'] = math.floor(
            (self._header._rc_header['nx'] - self._vc_roi['nx']) / 2.0)
        self._vc_roi['y_start'] = math.floor(
            (self._header._rc_header['ny'] - self._vc_roi['ny']) / 2.0)
        self._vc_n_pixels = self._vc_roi['nx'] * self._vc_roi['ny']

        if self._input_params.compression_scheme == 1:  #zstd
            self._compressor_context = zstd.ZstdCompressor(
                level=self._input_params.compression_level,
                write_content_size=False)
Ejemplo n.º 13
0
if __name__ == '__main__':

    conn = psycopg2.connect(
        host="",
        port="",
        user="",
        password="",
        dbname="feed_archiver"
    )

    cur = conn.cursor()

    cur.execute("SELECT COUNT(*) FROM %s" % TABLE)
    row_count = cur.fetchone()[0]

    cur.execute("DECLARE cur1 CURSOR FOR SELECT * FROM %s" % TABLE)

    rows = pg_fetch_cursor_all(cur, name="cur1", batch_size=5000)

    with open("out_mp.ndjson.zst", "wb") as f:
        cctx = zstd.ZstdCompressor(level=19, threads=THREADS)
        with cctx.stream_writer(f) as compressor:
            for row in tqdm(rows, total=row_count, unit="row"):
                _id, archived_on, data = row
                data["_archived_on"] = int(archived_on.timestamp())
                compressor.write(orjson.dumps(data))
                compressor.write(b"\n")

    conn.close()
Ejemplo n.º 14
0
def compress_read_to_iter_size(chunks, zparams):
    zctx = zstd.ZstdCompressor(compression_params=zparams)
    for chunk in chunks:
        for d in zctx.read_to_iter(chunk, size=len(chunk)):
            pass
Ejemplo n.º 15
0
 def create_zst_file(db_path, content=b'{"Hello": "World"}'):
     with open(db_path, "wb") as output_f:
         cctx = zstandard.ZstdCompressor()
         with cctx.stream_writer(output_f) as compressor:
             compressor.write(content)
Ejemplo n.º 16
0
def compress_compressobj_size(chunks, zparams):
    zctx = zstd.ZstdCompressor(compression_params=zparams)
    for chunk in chunks:
        cobj = zctx.compressobj(size=len(chunk))
        cobj.compress(chunk)
        cobj.flush()
Ejemplo n.º 17
0
import struct

import pyarrow
import scipy.sparse
import zstandard

# The magic intial bytes which tell us that a given binary chunk is ZStandard
# compressed data
ZSTD_MAGIC_NUMBER = struct.pack('<I', 0xFD2FB528)

compressor = zstandard.ZstdCompressor(level=16)
decompressor = zstandard.ZstdDecompressor()

context = pyarrow.SerializationContext()


def serialize_csc(matrix):
    """
    Decompose a matrix in Compressed Sparse Column format into more basic data
    types (tuples and numpy arrays) which PyArrow knows how to serialize
    """
    return ((matrix.data, matrix.indices, matrix.indptr), matrix.shape)


def deserialize_csc(args):
    """
    Reconstruct a Compressed Sparse Column matrix from its decomposed parts
    """
    return scipy.sparse.csc_matrix(*args)

Ejemplo n.º 18
0
def zstd_compress(path):
    cctx = zstandard.ZstdCompressor()
    with open(path, "rb") as input_f:
        with open(f"{path}.zst", "wb") as output_f:
            cctx.copy_stream(input_f, output_f)
Ejemplo n.º 19
0
def encode_zstd(content: bytes) -> bytes:
    zstd_ctx = zstd.ZstdCompressor()
    return zstd_ctx.compress(content)
Ejemplo n.º 20
0
def polydata_list_to_json(polydata_list, manager=None):  # noqa: C901
    """Serialize a list of a Python object that represents vtk.js PolyData.

    The returned data is compatibile with vtk.js PolyData with compressed data
    buffers.
    """
    if polydata_list is None:
        return None
    else:
        compressor = zstd.ZstdCompressor(level=3)

        json = []
        for polydata in polydata_list:
            json_polydata = dict()
            for top_key, top_value in polydata.items():
                if isinstance(top_value, dict):
                    nested_value_copy = dict()
                    for nested_key, nested_value in top_value.items():
                        if not nested_key == 'values':
                            nested_value_copy[nested_key] = nested_value
                    json_polydata[top_key] = nested_value_copy
                else:
                    json_polydata[top_key] = top_value

            if 'points' in json_polydata:
                point_values = polydata['points']['values']
                compressed = compressor.compress(point_values.data)
                compressedView = memoryview(compressed)
                json_polydata['points']['compressedValues'] = compressedView

            for cell_type in ['verts', 'lines', 'polys', 'strips']:
                if cell_type in json_polydata:
                    values = polydata[cell_type]['values']
                    compressed = compressor.compress(values.data)
                    compressedView = memoryview(compressed)
                    json_polydata[cell_type][
                        'compressedValues'] = compressedView

            for data_type in ['pointData', 'cellData']:
                if data_type in json_polydata:
                    data = polydata[data_type]
                    compressed_data = dict()
                    for nested_key, nested_value in data.items():
                        if not nested_key == 'arrays':
                            compressed_data[nested_key] = nested_value
                    compressed_arrays = []
                    for array in polydata[data_type]['arrays']:
                        compressed_array = dict()
                        for nested_key, nested_value in array['data'].items():
                            if not nested_key == 'values':
                                compressed_array[nested_key] = nested_value
                        values = array['data']['values']
                        compressed = compressor.compress(values.data)
                        compressedView = memoryview(compressed)
                        compressed_array['compressedValues'] = compressedView
                        compressed_arrays.append({'data': compressed_array})
                    compressed_data['arrays'] = compressed_arrays
                    json_polydata[data_type] = compressed_data

            json.append(json_polydata)
        return json
Ejemplo n.º 21
0
 def zstd_compress(body):
     c = zstd.ZstdCompressor()
     return c.compress(body)
Ejemplo n.º 22
0
    def test_level_bounds(self):
        with self.assertRaises(ValueError):
            zstd.ZstdCompressor(level=0)

        with self.assertRaises(ValueError):
            zstd.ZstdCompressor(level=23)
Ejemplo n.º 23
0
def compress_multi_compress_to_buffer_list(chunks, zparams, threads):
    zctx = zstd.ZstdCompressor(compression_params=zparams)
    zctx.multi_compress_to_buffer(chunks, threads=threads)
Ejemplo n.º 24
0
    "needsdiagnosis",
    "regression",
    "stepstoreproduce",
    "spambug",
    "testlabelselect",
    "testgroupselect",
]

DEFAULT_EXPIRATION_TTL = 7 * 24 * 3600  # A week
redis = Redis.from_url(os.environ.get("REDIS_URL", "redis://localhost/0"))

MODEL_CACHE: ReadthroughTTLCache[str, Model] = ReadthroughTTLCache(
    timedelta(hours=1), lambda m: Model.load(f"{m}model"))
MODEL_CACHE.start_ttl_thread()

cctx = zstandard.ZstdCompressor(level=10)


def setkey(key: str, value: bytes, compress: bool = False) -> None:
    LOGGER.debug(f"Storing data at {key}: {value!r}")
    if compress:
        value = cctx.compress(value)
    redis.set(key, value)
    redis.expire(key, DEFAULT_EXPIRATION_TTL)


def classify_bug(model_name: str, bug_ids: Sequence[int],
                 bugzilla_token: str) -> str:
    from bugbug_http.app import JobInfo

    # This should be called in a process worker so it should be safe to set
Ejemplo n.º 25
0
def compress_stream_reader(chunks, zparams):
    zctx = zstd.ZstdCompressor(compression_params=zparams)
    for chunk in chunks:
        with zctx.stream_reader(chunk) as reader:
            while reader.read(16384):
                pass
Ejemplo n.º 26
0
def compress_one_use(chunks, zparams):
    for chunk in chunks:
        zctx = zstd.ZstdCompressor(compression_params=zparams)
        zctx.compress(chunk)
Ejemplo n.º 27
0
def solidCompress(filePath, compressionLevel = 18, outputDir = None, threads = -1):

	ncaHeaderSize = 0x4000
	
	filePath = os.path.abspath(filePath)
	container = Fs.factory(filePath)
	container.open(filePath, 'rb')
	
	CHUNK_SZ = 0x1000000
	
	if outputDir is None:
		nszPath = filePath[0:-1] + 'z'
	else:
		nszPath = os.path.join(outputDir, os.path.basename(filePath[0:-1] + 'z'))
	
	nszPath = os.path.abspath(nszPath)
	nszFilename = os.path.basename(nszPath)
	
	# Getting title ID to check for NSZ file in the output directory
	# We should still keep this part of title ID comparison because not all files have titleID in
	# filename.
	titleId = ''
	for nspf in container:
		if isinstance(nspf, Fs.Ticket.Ticket):
			nspf.getRightsId()
			titleId = nspf.titleId()
			break # No need to go for other objects

	Print.info('compressing (level %d) %s -> %s' % (compressionLevel, filePath, nszPath))
	
	newNsp = Fs.Pfs0.Pfs0Stream(nszPath)
	
	try:

		for nspf in container:

			if isinstance(nspf, Fs.Nca.Nca) and nspf.header.contentType == Fs.Type.Content.DATA:
				Print.info('skipping delta fragment')
				continue
				
			if isinstance(nspf, Fs.Nca.Nca) and (nspf.header.contentType == Fs.Type.Content.PROGRAM or nspf.header.contentType == Fs.Type.Content.PUBLICDATA):
				if SectionFs.isNcaPacked(nspf, ncaHeaderSize):
					
					newFileName = nspf._path[0:-1] + 'z'
					
					f = newNsp.add(newFileName, nspf.size)
					
					start = f.tell()
					
					nspf.seek(0)
					f.write(nspf.read(ncaHeaderSize))
					
					sections = []
					for fs in SectionFs.sortedFs(nspf):
						sections += fs.getEncryptionSections()
					
					if len(sections) == 0:
						raise Exception("NCA can't be decrypted. Outdated keys.txt?")
					
					header = b'NCZSECTN'
					header += len(sections).to_bytes(8, 'little')
					
					i = 0
					for fs in sections:
						i += 1
						header += fs.offset.to_bytes(8, 'little')
						header += fs.size.to_bytes(8, 'little')
						header += fs.cryptoType.to_bytes(8, 'little')
						header += b'\x00' * 8
						header += fs.cryptoKey
						header += fs.cryptoCounter
						
					f.write(header)
					
					blockID = 0
					chunkRelativeBlockID = 0
					startChunkBlockID = 0
					blocksHeaderFilePos = f.tell()
					compressedblockSizeList = []
					
					decompressedBytes = ncaHeaderSize
					
					with tqdm(total=nspf.size, unit_scale=True, unit="B") as bar:
						
						partitions = []
						for section in sections:
							#print('offset: %x\t\tsize: %x\t\ttype: %d\t\tiv%s' % (section.offset, section.size, section.cryptoType, str(hx(section.cryptoCounter))))
							partitions.append(nspf.partition(offset = section.offset, size = section.size, n = None, cryptoType = section.cryptoType, cryptoKey = section.cryptoKey, cryptoCounter = bytearray(section.cryptoCounter), autoOpen = True))
							
						
						partNr = 0
						bar.update(f.tell())
						if threads > 1:
							cctx = zstandard.ZstdCompressor(level=compressionLevel, threads=threads)
						else:
							cctx = zstandard.ZstdCompressor(level=compressionLevel)
						compressor = cctx.stream_writer(f)
						while True:
						
							buffer = partitions[partNr].read(CHUNK_SZ)
							while (len(buffer) < CHUNK_SZ and partNr < len(partitions)-1):
								partitions[partNr].close()
								partitions[partNr] = None
								partNr += 1
								buffer += partitions[partNr].read(CHUNK_SZ - len(buffer))
							if len(buffer) == 0:
								break
							compressor.write(buffer)
							
							decompressedBytes += len(buffer)
							bar.update(len(buffer))
						partitions[partNr].close()
						partitions[partNr] = None
					
					compressor.flush(zstandard.FLUSH_FRAME)
					compressor.flush(zstandard.COMPRESSOBJ_FLUSH_FINISH)
					
					written = f.tell() - start
					print('compressed %d%% %d -> %d  - %s' % (int(written * 100 / nspf.size), decompressedBytes, written, nspf._path))
					newNsp.resize(newFileName, written)
					continue
				else:
					print('not packed!')

			f = newNsp.add(nspf._path, nspf.size)
			nspf.seek(0)
			while not nspf.eof():
				buffer = nspf.read(CHUNK_SZ)
				f.write(buffer)
		
	except KeyboardInterrupt:
		os.remove(nszPath)
		raise KeyboardInterrupt

	except BaseException as e:
		Print.error(traceback.format_exc())
		os.remove(nszPath)
	finally:
		newNsp.close()
		container.close()
		
	return nszPath