def _get_block_data(self, block): if block in self._blocks_cache_queue: return self._blocks_cache[block] if not (block < len(self._index)): raise ZeexOutOfBoundExceptions("Requested block not in file", block) sys.stderr.write('Block: {}'.format(block)) offset = self._index[block] + self.headersize if (block + 1) < len(self._index): csize = self._index[block + 1] + self.headersize - offset else: csize = self.header.cdata_length + self.headersize - offset self._infile.seek(offset) compressed = self._infile.read(csize) data = lzma.decompress(compressed) if len(self._blocks_cache_queue) > self._max_cached_blocks: del self._blocks_cache[self._blocks_cache_queue[0]] del self._blocks_cache_queue[0] self._blocks_cache_queue.append(block) self._blocks_cache[block] = data return data
def xz_decompress(data): '''decompress xz `data` using backports.lzma, or if that's not available then the commandline `xz --decompress` tool ''' if xz is not None: try: bigdata = xz.decompress(data) data = bigdata except: logger.error('decompress of %s bytes failed', len(data)) raise else: ## launch xz child xz_child = subprocess.Popen(['xz', '--decompress'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) ## use communicate to pass the data incrementally to the child ## while reading the output, to avoid blocking data, errors = xz_child.communicate(data) assert not errors, errors return data
def open_db(self): self.terms_ldb = leveldb.LevelDB(self.terms_fl) self.docs_ldb = leveldb.LevelDB(self.docs_fl) self.doc_buffer_size = 0 self.term_buffer_size = 0 #self.doc_flush_buffer = np.empty(self.max_doc_flush_buffer_size, dtype="S%d" % self.max_doc_size) self.doc_flush_buffer = [None] * self.max_doc_flush_buffer_size self.term_flush_buffer = np.empty(self.max_term_flush_buffer_size, dtype="S%d" % self.max_term_size) self.doc_id_flush_buffer = np.empty(self.max_doc_flush_buffer_size, dtype=np.int64) self.term_id_flush_buffer = np.empty(self.max_term_flush_buffer_size, dtype=np.int64) if self.compression == COMPRESSION.NONE: self.compress = lambda string: string self.decompress = lambda string: string elif self.compression == COMPRESSION.ZLIB: import zlib self.compress = lambda string: zlib.compress(string, self.compression_level) self.decompress = lambda string: zlib.decompress(string) elif self.compression == COMPRESSION.LZMA: import backports.lzma as lzma self.compress = lambda string: lzma.compress(bytearray(string), format=lzma.FORMAT_RAW) self.decompress = lambda data: lzma.decompress(data, format=lzma.FORMAT_RAW) elif self.compression == COMPRESSION.LZ4R: import lz4 self.compress = lambda string: lz4.compress(string) self.decompress = lambda string: lz4.decompress(string) elif self.compression == COMPRESSION.LZ4H: import lz4 self.compress = lambda string: lz4.compressHC(string) self.decompress = lambda string: lz4.decompress(string) else: raise Exception("Wrong compression type %r" % self.compression)
def xz_decompress(data): '''decompress xz `data` using backports.lzma, or if that's not available then the commandline `xz --decompress` tool ''' if xz is not None: try: bigdata = xz.decompress(data) data = bigdata except: logger.error('decompress of %s bytes failed', len(data)) raise else: ## launch xz child xz_child = subprocess.Popen( ['xz', '--decompress'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) ## use communicate to pass the data incrementally to the child ## while reading the output, to avoid blocking data, errors = xz_child.communicate(data) assert not errors, errors return data
def init_index(self): if self.is_multi_deb: self.index_path = '/'.join([self.components[0]]) url = '/'.join([self.uri, self.components[0], 'Packages.xz']) else: self.index_path = '/'.join([ 'dists', self.distribution, self.components[0], 'binary-' + self.arch ]) url = '/'.join([ self.uri, 'dists', self.distribution, self.components[0], 'binary-' + self.arch, 'Packages.xz' ]) content = httpClient.get(url) index_data = lzma.decompress(content) self.index_list = self.parse_package_file(index_data) self.index = {} for entry in self.index_list: if self.index.get(entry.get('Package')) is None: self.index[entry.get('Package')] = [] self.index[entry.get('Package')].append(entry) # sort package with same name with version lastest -> oldest for key in self.index: self.index[key] = sorted( self.index[key], cmp=lambda l, r: -1 if version.parse(l.get('Version', '0.0')) >= version.parse( r.get('Version', '0.0')) else 0)
def import_base_image(self, repository, tagprefix, url, digest): """Secure Docker base image importing. `docker pull` is not secure because it doesn't verify digests before processing data. Instead, it "tees" the image content to the image processing layer and the hasher and verifies the digest matches expected only after all image processing has occurred. While fast (images don't need to be buffered before being applied), it is insecure because a malicious image could exploit a bug in image processing and take control of the Docker daemon and your machine. This function takes a repository name, tag prefix, URL, and a SHA-256 hex digest as arguments and returns the Docker image ID for the image. The contents of the image are, of course, verified to match the digest before being applied. The imported image is "tagged" in the repository specified. The tag of the created image is set to the specified prefix and the SHA-256 of a combination of the URL and digest. This serves as a deterministic cache key so subsequent requests for a (url, digest) can be returned nearly instantly. Of course, this assumes: a) the Docker daemon and its stored images can be trusted b) content of URLs is constant. """ tag = '%s-%s' % (tagprefix, hashlib.sha256('%s%s' % (url, digest)).hexdigest()) for image in self._get_sorted_images(): for repotag in image['RepoTags']: r, t = repotag.split(':') if r == repository and t == tag: return image['Id'] # We didn't get a cache hit. Download the URL. with tempfile.NamedTemporaryFile() as fh: digester = hashlib.sha256() res = requests.get(url, stream=True) for chunk in res.iter_content(8192): fh.write(chunk) digester.update(chunk) # Verify content before doing anything with it. # (This is the part Docker gets wrong.) if digester.hexdigest() != digest: raise Exception('downloaded Docker image does not match ' 'digest: %s; got %s expected %s' % (url, digester.hexdigest(), digest)) fh.flush() fh.seek(0) # Docker 1.10 no longer appears to allow import of .xz files # directly. Do the decompress locally. if url.endswith('.xz'): fh = lzma.decompress(fh.read()) res = self.api_client.import_image_from_data( fh, repository=repository, tag=tag) # docker-py doesn't parse the JSON response in what is almost # certainly a bug. Do it ourselves. return json.loads(res.strip())['status']
def _read_v2(filename): try: key = _bucket_v2.get_key(filename) compressed = key.get_contents_as_string() raw = lzma.decompress(compressed).split("\n")[:-1] return map(lambda x: x.split("\t", 1)[1], raw) except ssl.SSLError: return []
def loads(astring): """Decompress and deserialize string into a Python object via pickle.""" try: return pickle.loads(lzma.decompress(astring)) except lzma.LZMAError as e: raise SerializerError('Cannot decompress object ("{}")'.format( str(e))) except pickle.UnpicklingError as e: raise SerializerError('Cannot restore object ("{}")'.format( str(e)))
def main(): import sys import os import tarfile if sys.version_info < (3, ): from backports import lzma import urllib2 else: import lzma from urllib.request import FancyURLopener class MyURLOpener(FancyURLopener): version = 'Mozilla/5.0' try: nim_version_string = sys.argv[1] except IndexError: nim_version = (1, 2, 6) nim_version_string = '.'.join([str(x) for x in nim_version]) nim_download = 'http://nim-lang.org/download/nim-{}.tar.xz'.format( nim_version_string) print('getting', nim_download) inst_dir = os.path.dirname(os.path.dirname(sys.executable)) print('inst_dir', inst_dir) os.chdir(inst_dir) if True: from io import BytesIO if sys.version_info < (3, ): # request = urllib2.Request(nim_download) # request.add_header('User-Agent', "Mozilla/5.0") opener = urllib2.build_opener() opener.addheaders = [('User-Agent', 'Mozilla/5.0')] else: opener = MyURLOpener() response = opener.open(nim_download) data = BytesIO() data.write(lzma.decompress(response.read())) data.seek(0) with tarfile.open(fileobj=data, mode='r') as tar: for tarinfo in tar: if '/' not in tarinfo.name: continue name = tarinfo.name.split('/', 1)[1] if tarinfo.isdir(): if not os.path.exists(name): os.mkdir(name) continue # print('tarinfo', tarinfo.name, name, tarinfo.isdir()) with open(name, 'wb') as fp: fp.write(tar.extractfile(tarinfo).read()) # os.system('make -j8') os.system('sh build.sh') os.system('./bin/nim c koch') os.system('./koch tools')
def loads(astring): """Decompress and deserialize string into a Python object via pickle.""" try: return pickle.loads(lzma.decompress(astring)) except lzma.LZMAError as e: raise SerializerError( 'Cannot decompress object ("{}")'.format(str(e)) ) except pickle.UnpicklingError as e: raise SerializerError( 'Cannot restore object ("{}")'.format(str(e)) )
def run(self): while True: key = self.inqueue.get() k = Key(self.bucket) k.key=key keyContents = k.get_contents_as_string() lz = lzma.decompress(keyContents, format=lzma.FORMAT_ALONE) for line in lz.split('\n'): if line != '': self.outqueue.put(line) self.inqueue.task_done()
def run(self): while True: key = self.inqueue.get() k = Key(self.bucket) k.key=key keyContents = k.get_contents_as_string() lz = lzma.decompress(keyContents, format=lzma.FORMAT_ALONE) for line in lz.split('\n'): if line != '': j = json.loads(line[37:]) if self.measure in j['histograms'].keys(): histo = json.dumps(j['histograms'][self.measure]) version = j['info']['appUpdateChannel'] branch = j['info']['appVersion'] self.outqueue.put(histo + ',' + version + ',' + branch) self.inqueue.task_done()
def open_db(self): self.terms_ldb = leveldb.LevelDB(self.terms_fl) self.docs_ldb = leveldb.LevelDB(self.docs_fl) self.doc_buffer_size = 0 self.term_buffer_size = 0 #self.doc_flush_buffer = np.empty(self.max_doc_flush_buffer_size, dtype="S%d" % self.max_doc_size) self.doc_flush_buffer = [None] * self.max_doc_flush_buffer_size self.term_flush_buffer = np.empty(self.max_term_flush_buffer_size, dtype="S%d" % self.max_term_size) self.doc_id_flush_buffer = np.empty(self.max_doc_flush_buffer_size, dtype=np.int64) self.term_id_flush_buffer = np.empty(self.max_term_flush_buffer_size, dtype=np.int64) if self.compression == COMPRESSION.NONE: self.compress = lambda string: string self.decompress = lambda string: string elif self.compression == COMPRESSION.ZLIB: import zlib self.compress = lambda string: zlib.compress( string, self.compression_level) self.decompress = lambda string: zlib.decompress(string) elif self.compression == COMPRESSION.LZMA: import backports.lzma as lzma self.compress = lambda string: lzma.compress( bytearray(string), format=lzma.FORMAT_RAW) self.decompress = lambda data: lzma.decompress( data, format=lzma.FORMAT_RAW) elif self.compression == COMPRESSION.LZ4R: import lz4 self.compress = lambda string: lz4.compress(string) self.decompress = lambda string: lz4.decompress(string) elif self.compression == COMPRESSION.LZ4H: import lz4 self.compress = lambda string: lz4.compressHC(string) self.decompress = lambda string: lz4.decompress(string) else: raise Exception("Wrong compression type %r" % self.compression)
def decompress(data, *args, **kwargs): return lzma.decompress(data, *args, **kwargs)
def get(self, n=1): res = self.call('post', '/q/%s/%d' % (self.queue, n)) return cPickle.loads(lzma.decompress(res.content))
__doc__, usage='python compare-compression.py PATH-TO-.fc.xz') parser.add_argument( 'path', help='path to an XZ-compressed file to read for compression tests') args = parser.parse_args() ## could check something about the FCs #from dossier.fc.feature_collection import FeatureCollectionChunk as FCChunk #fcc = FCChunk(path) #for fc in fcc: # print fc['feature_name'] ## assume the incoming data is some long-term archival stuff in XZ ## compressed format, and you want to see how slow XZ is: xz_data = open(args.path).read() start = time.time() data = xz.decompress(xz_data) decompression_time = time.time() - start start = time.time() xz_data2 = xz.compress(data) assert xz_data2 == xz_data compression_time = time.time() - start def report(rec): rec['MB'] = rec['bytes'] / 2**20 rec['ratio'] = rec['bytes'] / len(data) ctime = rec.get('compression_time') rec['compression_rate'] = ctime and rec['MB'] / ctime or float('inf') dtime = rec.get('decompression_time') rec['decompression_rate'] = dtime and rec['MB'] / dtime or 0
def decrypt_and_uncompress(data, gpg_private=None, tmp_dir='/tmp'): ''' Given a data buffer of bytes, if gpg_key_path is provided, decrypt data using gnupg, and uncompress using xz. :returns: a tuple of (logs, data), where `logs` is an array of strings and data is a binary string ''' _errors = [] tmp_path = os.path.join(tmp_dir, 'tmp-compress-and-encrypt-path-' + uuid.uuid4().hex) if not os.path.exists(tmp_path): os.makedirs(tmp_path) if gpg_private is not None: ### setup gpg for decryption gpg_dir = os.path.join(tmp_path, 'gpg_dir') os.makedirs(gpg_dir) gpg_child = subprocess.Popen( ['gpg', '--no-permission-warning', '--homedir', gpg_dir, '--import', gpg_private], stderr=subprocess.PIPE) s_out, errors = gpg_child.communicate() if errors: _errors.append('gpg logs to stderr, read carefully:\n\n%s' % errors) ## decrypt it, and free memory ## encrypt using the fingerprint for our trec-kba-rsa key pair gpg_child = subprocess.Popen( ## setup gpg to decrypt with trec-kba private key ## (i.e. make it the recipient), with zero compression, ## ascii armoring is off by default, and --output - must ## appear before --decrypt - ['gpg', '--no-permission-warning', '--homedir', gpg_dir, '--trust-model', 'always', '--output', '-', '--decrypt', '-'], stdin =subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) ## communicate with child via its stdin data, errors = gpg_child.communicate(data) if errors: _errors.append(errors) ## remove the gpg_dir shutil.rmtree(gpg_dir, ignore_errors=True) if lzma is not None: data = lzma.decompress(data) else: ## launch xz child xz_child = subprocess.Popen( ['xz', '--decompress'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) ## use communicate to pass the data incrementally to the child ## while reading the output, to avoid blocking data, errors = xz_child.communicate(data) assert not errors, errors return _errors, data
def _decode_lzma(self, string_data, shape, dtype): arr = lzma.decompress(string_data) arr = np.fromstring(arr, dtype=dtype) return arr.reshape(shape[::-1]).T
parser = argparse.ArgumentParser(__doc__, usage='python compare-compression.py PATH-TO-.fc.xz') parser.add_argument('path', help='path to an XZ-compressed file to read for compression tests') args = parser.parse_args() ## could check something about the FCs #from dossier.fc.feature_collection import FeatureCollectionChunk as FCChunk #fcc = FCChunk(path) #for fc in fcc: # print fc['feature_name'] ## assume the incoming data is some long-term archival stuff in XZ ## compressed format, and you want to see how slow XZ is: xz_data = open(args.path).read() start = time.time() data = xz.decompress(xz_data) decompression_time = time.time() - start start = time.time() xz_data2 = xz.compress(data) assert xz_data2 == xz_data compression_time = time.time() - start def report(rec): rec['MB'] = rec['bytes'] / 2**20 rec['ratio'] = rec['bytes'] / len(data) ctime = rec.get('compression_time') rec['compression_rate'] = ctime and rec['MB'] / ctime or float('inf') dtime = rec.get('decompression_time') rec['decompression_rate'] = dtime and rec['MB'] / dtime or 0 print('%(name)s:\t%(MB)d MB of FC data, %(ratio).3f compression, %(compression_time).3f seconds --> %(compression_rate).3f MB/sec compression, '