def tar_lzma_generator(tree, dest, root, subdir, force_mtime=None, compression_format="alone"): """Export this tree to a new .tar.lzma file. `dest` will be created holding the contents of this tree; if it already exists, it will be clobbered, like with "tar -c". """ try: import lzma except ImportError as e: raise errors.DependencyNotPresent('lzma', e) if sys.version_info[0] == 2: compressor = lzma.LZMACompressor( options={"format": compression_format}) else: compressor = lzma.LZMACompressor( format={ 'xz': lzma.FORMAT_XZ, 'raw': lzma.FORMAT_RAW, 'alone': lzma.FORMAT_ALONE, }[compression_format]) for chunk in tarball_generator( tree, root, subdir, force_mtime=force_mtime): yield compressor.compress(chunk) yield compressor.flush()
def compress(method, data_in): ''' Compress data in one of a set of methods. ''' data = None if method == 'gzip': import gzip data = gzip.compress(data_in) elif method == 'bz2': import bz2 c = bz2.BZ2Compressor() data = c.compress(data_in) data += c.flush() elif method == 'xz': import lzma c = lzma.LZMACompressor(format=lzma.FORMAT_XZ) data = c.compress(data_in) data += c.flush() elif method == 'none': data = data_in else: raise Exception('Invalid compression method') return data
def _lzma_compress(raw_str, complevel=9): zipper = lzma.LZMACompressor(options={'format': 'alone', 'level': complevel, 'extreme': False, 'lc': 3, 'lp': 0, 'pb': 2, 'mode': 2, 'mf': 'bt4', 'nice_len': 273, 'depth': 0, 'dict_size': 67108864}) compressed = zipper.compress(raw_str) compressed += zipper.flush() return compressed
def _init(self): props = lzma._encode_filter_properties(self.compress_options) self._comp = lzma.LZMACompressor( lzma.FORMAT_RAW, filters=( lzma._decode_filter_properties(self.compress_options["id"], props),)) return struct.pack("<BBH", 9, 4, len(props)) + props
def write_data(self, package_dir, out): files = self.get_package_files(package_dir) progress = tqdm(files, desc=' '*50) for path, shortpath in progress: if self.compress: lzc = lzma.LZMACompressor() if shortpath.name == '__init__.py': module_path = shortpath.parent elif shortpath.suffix == '.py': module_path = shortpath.with_suffix('') else: continue progress.set_description('{: <50}'.format(str(shortpath))) with path.open('rb') as fp: out.write(repr(str(os.path.normpath(module_path)).replace('/', '.'))) out.write(':(') out.write(str(shortpath.name == '__init__.py')) out.write(',') for part in self.incremental_read(fp): if self.compress: part = lzc.compress(part) if not part: continue out.write(repr(part)) out.write('\\\n') if self.compress: out.write(repr(lzc.flush())) else: out.write('b""') out.write('),')
def get(cls): if support['lzma']: return b'LZ00', lzma.LZMACompressor() elif support['bz2']: return b'BZ00', bz2.BZ2Compressor() else: return b'ZL00', zlib.compressobj()
def pghoard(db, tmpdir, request): # pylint: disable=redefined-outer-name test_site = request.function.__name__ if os.environ.get("pghoard_test_walreceiver"): active_backup_mode = "walreceiver" else: active_backup_mode = "pg_receivexlog" config = { "alert_file_dir": os.path.join(str(tmpdir), "alerts"), "backup_location": os.path.join(str(tmpdir), "backupspool"), "backup_sites": { test_site: { "active_backup_mode": active_backup_mode, "basebackup_count": 2, "basebackup_interval_hours": 24, "pg_bin_directory": db.pgbin, "pg_data_directory": db.pgdata, "nodes": [db.user], "object_storage": { "storage_type": "local", "directory": os.path.join(str(tmpdir), "backups"), }, }, }, "compression": { "algorithm": "snappy" if snappy else "lzma", }, "http_address": "127.0.0.1", "http_port": random.randint(1024, 32000), "json_state_file_path": tmpdir.join("pghoard_state.json").strpath, "maintenance_mode_file": tmpdir.join("maintenance_mode_file").strpath, } confpath = os.path.join(str(tmpdir), "config.json") with open(confpath, "w") as fp: json.dump(config, fp) backup_site_path = os.path.join(config["backup_location"], test_site) basebackup_path = os.path.join(backup_site_path, "basebackup") backup_xlog_path = os.path.join(backup_site_path, "xlog") backup_timeline_path = os.path.join(backup_site_path, "timeline") os.makedirs(config["alert_file_dir"]) os.makedirs(basebackup_path) os.makedirs(backup_xlog_path) os.makedirs(backup_timeline_path) pgh = PGHoard(confpath) pgh.test_site = test_site pgh.start_threads_on_startup() if snappy: pgh.Compressor = snappy.StreamCompressor else: pgh.Compressor = lambda: lzma.LZMACompressor(preset=0) # pylint: disable=redefined-variable-type time.sleep(0.05) # Hack to give the server time to start up yield pgh pgh.quit()
def init(self): self.pos = 0 if self.mode == "r": self.lzmaobj = lzma.LZMADecompressor() # self.fileobj.seek(0) self.buf = b"" else: self.lzmaobj = lzma.LZMACompressor()
def test_compression_stream_reset(self): # test reset compress = lzma.LZMACompressor(options={'format': 'xz'}) data = compress.compress(self.plain[:10]) compress.reset(options={'format': 'xz'}) data = compress.compress(self.plain[:15]) data += compress.compress(self.plain[15:]) data += compress.flush() self.assertEqual(data, self.compressed_stream_xz)
def comp_ratio(test_string): # If we are agnostic about what the symbols are, and we just observe the relative frequency of each symbol. # The distribution of frequencies would make some texts harder to compress, even if we don't know what the symbols mean. # http://www.beamreach.org/data/101/Science/processing/Nora/Papers/Information%20entropy%20o%20fjumpback%20whale%20songs.pdf c = lzma.LZMACompressor() bytes_in = bytes(test_string, "utf-8") bytes_out = c.compress(bytes_in) return len(bytes_out) / len(bytes_in)
def processMap(task): time_start = datetime.now() try: #conti = QgsVectorLayer("C:/Users/david/Documents/GitHub/terracustomtreerepo/project_resources/continents/continents.shp","continents","ogr") conti_mapping = {"Africa" : 1, "Asia" : 2, "Europe" : 8, "Oceania" : 5, "South America" : 6, "Australia" : 3, "North America" : 4} out_file = "C:/Users/david/Documents/GitHub/terracustomtreerepo/treemap/continents_map.lzma" out_file2 = "C:/Users/david/Documents/GitHub/terracustomtreerepo/treemap/continents_map.png" conti_ds = ogr.Open("C:/Users/david/Documents/GitHub/terracustomtreerepo/project_resources/continents/continents.shp") pixel_size = 0.1 source_layer = conti_ds.GetLayer() source_srs = source_layer.GetSpatialRef() x_min, x_max, y_min, y_max = source_layer.GetExtent() width = int((x_max - x_min) / pixel_size) height = int((y_max - y_min) / pixel_size) mem_driver = gdal.GetDriverByName("MEM") conti_ras = mem_driver.Create('', width, height, 1, gdal.GDT_Byte) conti_ras.SetGeoTransform((x_min, pixel_size, 0, y_max, 0, -pixel_size)) band = conti_ras.GetRasterBand(1) band.SetNoDataValue(0) gdal.RasterizeLayer(conti_ras, [1], source_layer, options=['ATTRIBUTE=FID']) gdal.GetDriverByName('PNG').CreateCopy(out_file2, conti_ras) conti_map = conti_ras.GetRasterBand(1).ReadAsArray(); conti_ras = None #map_data = np.empty((360, 720), dtype=np.uint8) #long = (((x + 0.5) * 360) / 43200) - 180 #lat = (((y + 0.5) * -180) / 21600) + 90 lzc = lzma.LZMACompressor(format=lzma.FORMAT_ALONE) oned_map = conti_map.flatten(); with open(out_file, 'wb') as cf: cf.write(lzc.compress(oned_map) + lzc.flush()) except Exception as e: QgsMessageLog.logMessage( 'Error: {error}'.format(error=str(e)), CATEGORY, Qgis.Info) return time_start
def lzma_compress(data): lzc = lzma.LZMACompressor( **LZMA_OPTIONS, check=lzma.CHECK_CRC32, preset=lzma.PRESET_EXTREME, ) out = lzc.compress(data) out += lzc.flush() return out
def test_compression_stream(self): # test compression object in one steps compress = lzma.LZMACompressor(options={'format': 'alone'}) data = compress.compress(self.plain) data += compress.flush() self.assertEqual(data, self.compressed_stream_alone) compress.reset(options={'format': 'xz'}) data = compress.compress(self.plain) data += compress.flush() self.assertEqual(data, self.compressed_stream_xz)
def lzma_compress(self, data): """ Build lzma compression :param data: json file to be compressed :rtype: object """ lzc = lzma.LZMACompressor(check=lzma.CHECK_CRC32) return lzc.compress(data) + lzc.flush()
def __init__(self, src_fp, algorithm, level=0): super().__init__(src_fp, minimum_read_size=32 * 1024) if algorithm == "lzma": self._compressor = lzma.LZMACompressor(lzma.FORMAT_XZ, -1, level, None) elif algorithm == "snappy": self._compressor = snappy.StreamCompressor() elif algorithm == "zstd": self._compressor = zstd.ZstdCompressor(level=level).compressobj() else: raise InvalidConfigurationError("invalid compression algorithm: {!r}".format(algorithm))
def compressLZMA(data, archive_name): #file("install.tar").read() options = {"format": compressformat, "level": compresslevel} lzmaobj = lzma.LZMACompressor() lzmaobj.reset(options) #compress_data = lzmaobj.compress(data) fileobj = lzma.LZMAFile(archive_name , "w", options = options) fileobj.write( lzmaobj.compress(data) ) fileobj.close()
def __init__(self, filters=None): if filters is None: self.filters = [{"id": lzma.FILTER_LZMA2, "preset": 7 | lzma.PRESET_EXTREME}, ] else: self.filters = filters self.compressor = lzma.LZMACompressor(format=lzma.FORMAT_RAW, filters=self.filters) self.coders = [] for filter in self.filters: method = self.lzma_methods_map_r[filter['id']] properties = lzma._encode_filter_properties(filter) self.coders.append({'method': method, 'properties': properties, 'numinstreams': 1, 'numoutstreams': 1})
def init(self): import lzma self.pos = 0 if self.mode == "r": self.lzmaobj = lzma.LZMADecompressor() # Seeking here can cause problems with Python 2.7 # if hasattr(self.fileobj, "seek"): # self.fileobj.seek(0) self.buf = "" else: self.lzmaobj = lzma.LZMACompressor()
def compressor(self, compression_algorithm): if compression_algorithm == "lzma": return lzma.LZMACompressor(preset=0) elif compression_algorithm == "snappy": if not snappy: raise MissingLibraryError( "python-snappy is required when using snappy compression") return snappy.StreamCompressor() else: raise InvalidConfigurationError( "invalid compression algorithm: {!r}".format( compression_algorithm))
def test_compression_stream_props(self): # test compression with properties in separate step compress = lzma.LZMACompressor(options={'format': 'alone'}) data = compress.compress(self.plain[:5]) data += compress.compress(self.plain[5:]) data += compress.flush() self.assertEqual(data, self.compressed_stream_alone) compress.reset(options={'format': 'xz'}) data = compress.compress(self.plain[:5]) data += compress.compress(self.plain[5:]) data += compress.flush() self.assertEqual(data, self.compressed_stream_xz)
def test_compression_stream_two(self): # test compression in two steps compress = lzma.LZMACompressor(options={'format': 'alone'}) data = compress.compress(self.plain[:10]) data += compress.compress(self.plain[10:]) data += compress.flush() self.assertEqual(data, self.compressed_stream_alone) compress.reset(options={'format': 'xz'}) data = compress.compress(self.plain[:10]) data += compress.compress(self.plain[10:]) data += compress.flush() self.assertEqual(data, self.compressed_stream_xz)
def lzma_demo(): # 增量压缩 lzmacom = lzma.LZMACompressor() data = lzmacom.compress(b'luzhuo') data += lzmacom.compress(b'.') data += lzmacom.compress(b'me') print("增量压缩: ", data) # 读写 with open('file.txt', 'rb') as read, lzma.open('file.txt.xz', 'wb') as write: shutil.copyfileobj(read, write)
def compress(algorithm: CompressionAlgorithm, data: bytes) -> bytes: if algorithm is CompressionAlgorithm.NONE: return data if algorithm is CompressionAlgorithm.LZMA: compressor = lzma.LZMACompressor() compressed_data = compressor.compress(data) compressed_data += compressor.flush() return compressed_data if algorithm is CompressionAlgorithm.ZLIB: return zlib.compress(data) if algorithm is CompressionAlgorithm.BZ2: return bz2.compress(data) raise ValueError
def compress(data: bytes) -> bytes: ec = lzma.LZMACompressor( format=lzma.FORMAT_RAW, filters=[{ "id": lzma.FILTER_LZMA1, "dict_size": 524288, "lc": 3, "lp": 0, "pb": 2, }], ) ec.compress(data) return b"]\x00\x00\x08\x00" + ec.flush()
def compress_sequences(seqs, node_feature_mapping, edge_feature_mapping): compressor = lzma.LZMACompressor(preset=9 | lzma.PRESET_EXTREME) total_bytes = 0 for seq in seqs: integers = sequence_to_integers(seq, node_feature_mapping, edge_feature_mapping) total_bytes += len(compressor.compress(integers.tobytes())) total_bytes += len(compressor.flush()) return total_bytes
def compress(inputString): with io.BytesIO() as bio: bio.write(inputString.encode("utf-8")) bio.seek(0) buffers = [] with io.BytesIO() as stream: compressor = lzma.LZMACompressor() while True: # until EOF chunk = bio.read(8192) if not chunk: # EOF? buffers.append(compressor.flush()) return b"".join(buffers) buffers.append(compressor.compress(chunk))
def compress(self, data): """ Create a lzma compressor and compress some data. After calling flush the compressor can't be used again. Hence, a new compressor is created for each use. :return: data as a bytes object. """ if not isinstance(data, bytes): raise Exception( f"Can only compress bytes, got {type(data)}") compressor = lzma.LZMACompressor() data = compressor.compress(data) + compressor.flush() return data
def test_compressionDecompressionRoundtrip(self): # Generate some input data someText = "something..." originalData = someText.encode() # Compress import lzma lzc = lzma.LZMACompressor() compressedData = lzc.compress(originalData) + lzc.flush() # Uncompress lzd = lzma.LZMADecompressor() uncompressedData = lzd.decompress(compressedData) # Test if data after compression&decompression is the same as the original self.assertEqual(originalData, uncompressedData)
def compress_lzma(chunks, lvl): ''' Compress the given list of <chunks> using <lvl> as the level of the compression (1 for the fastest but with lowest compression ratio, 9 for the slowest but with the highest compression ratio; 0 means 'no compression at all'. >>> chunks = list(zlog.compress_lzma(texts, lvl=9)) ''' c = lzma.LZMACompressor(preset=lvl) for chunk in chunks: data = c.compress(chunk) if data: yield data data = c.flush() if data: yield data
def open_write(self, key, metadata=None, is_compressed=False): if metadata is None: metadata = dict() elif not isinstance(metadata, dict): raise TypeError('*metadata*: expected dict or None, got %s' % type(metadata)) meta_buf = freeze_basic_mapping(metadata) meta_raw = dict(format_version=2) if is_compressed or self.compression[0] is None: compr = None meta_raw['compression'] = 'None' elif self.compression[0] == 'zlib': compr = zlib.compressobj(self.compression[1]) meta_raw['compression'] = 'ZLIB' elif self.compression[0] == 'bzip2': compr = bz2.BZ2Compressor(self.compression[1]) meta_raw['compression'] = 'BZIP2' elif self.compression[0] == 'lzma': compr = lzma.LZMACompressor(preset=self.compression[1]) meta_raw['compression'] = 'LZMA' if self.passphrase is not None: nonce = struct.pack('<f', time.time()) + key.encode('utf-8') meta_key = sha256(self.passphrase + nonce + b'meta') data_key = sha256(self.passphrase + nonce) meta_raw['encryption'] = 'AES_v2' meta_raw['nonce'] = nonce meta_raw['data'] = aes_cipher(meta_key).encrypt(meta_buf) meta_raw['object_id'] = key meta_raw['signature'] = checksum_basic_mapping(meta_raw, meta_key) else: meta_raw['encryption'] = 'None' meta_raw['data'] = meta_buf fh = self.backend.open_write(key, meta_raw) if self.passphrase is not None: fh = EncryptFilter(fh, data_key) if compr: fh = CompressFilter(fh, compr) return fh