def test_lz4_compress_big_chunk(self): try: import lz4 except ImportError: raise unittest.SkipTest("lz4 not installed, skip the test") yield self.insertTestData(self.backgroundData + self.testLogLines) line = u'xy' * 10000 self.db.master.config.logCompressionMethod = "lz4" self.assertEqual((yield self.db.logs.appendLog(201, line + '\n')), (7, 7)) def thd(conn): res = conn.execute( self.db.model.logchunks.select( whereclause=self.db.model.logchunks.c.first_line > 6)) row = res.fetchone() res.close() return dict(row) newRow = yield self.db.pool.do(thd) self.assertEqual( newRow, { 'logid': 201, 'first_line': 7, 'last_line': 7, 'content': lz4.dumps(line), 'compressed': 3 })
def test_lz4_compress_big_chunk(self): try: import lz4 except ImportError: raise unittest.SkipTest("lz4 not installed, skip the test") yield self.insertTestData(self.backgroundData + self.testLogLines) line = u'xy' * 10000 self.db.master.config.logCompressionMethod = "lz4" self.assertEqual( (yield self.db.logs.appendLog(201, line + '\n')), (7, 7)) def thd(conn): res = conn.execute(self.db.model.logchunks.select( whereclause=self.db.model.logchunks.c.first_line > 6)) row = res.fetchone() res.close() return dict(row) newRow = yield self.db.pool.do(thd) self.assertEqual(newRow, { 'logid': 201, 'first_line': 7, 'last_line': 7, 'content': lz4.dumps(line), 'compressed': 3})
def Compress(sou, des): try: with open(des, 'wb') as out: with open(sou, 'rb') as inFile: out.write(lz4.dumps(inFile.read())) out.flush() out.close() except IOError: print('文件找不到')
def inner(*args, **kwargs): mode = wrapper.mode name = wrapper.name dir = wrapper.dir compress = wrapper.compress verbose = wrapper.verbose if mode in [False, 'skip']: return func(*args, **kwargs) if name == None: name = func.__name__ if not os.path.exists(dir): os.makedirs(dir) cachePath = os.path.join(dir, name) if compress: import lz4 if os.path.exists(cachePath) and mode != 'update': if compress == 'lz4': cached = StringIO(lz4.loads(open(cachePath, 'r').read())) else: cached = cachePath #cached = open(cachePath,'r') if verbose: print('\t!! Cached from %s' % cachePath) aOut = load(cached) if aOut.shape != () or purge_empty_file == False: return aOut else: os.remove(cachePath) raise ValueError('empty cache file (erased): %s' % (cachePath)) if os.path.exists(cachePath) == False or mode == 'update': aOut = func(*args, **kwargs) if compress == 'lz4': cached = StringIO() save(cached, aOut) open(cachePath, 'w').write(lz4.dumps(cached.getvalue())) else: fCache = open(cachePath, 'wb') save(fCache, aOut) fCache.close() if verbose: print('\t!! Cached to %s' % cachePath) return aOut raise KeyError('failed exception handling for %s and %s' % (cachePath, mode))
def save(self, dict_res={}): pipeline = redis_one.pipeline() for u,v in dict_res.iteritems(): title, keyword, description = self.get_keyword(v) content = self.get_content(v) id = self.get_id(u) dict_info = {'url':u, 'title':title, 'keyword':keyword, 'description':description, 'content':content, 'id':id} #logger.error('|'.join([dict_info[t] for t in ['url', 'title', 'keyword', 'description']])) pipeline.hset(self.prefix, u, lz4.dumps(cPickle.dumps(dict_info))) pipeline.execute()
def serialize_subarray(cls, subarray): if not subarray.flags['C_CONTIGUOUS']: subarray = subarray.copy(order='C') # Buffers larger than 1 GB would overflow # We could fix this by slicing each slice into smaller pieces... assert subarray.nbytes <= cls.MAX_LZ4_BUFFER_SIZE, \ "FIXME: This class doesn't support arrays whose slices are each > 1 GB" return lz4.dumps( np.getbuffer(subarray) )
def inner(*args, **kwargs): mode = wrapper.mode name = wrapper.name dir = wrapper.dir compress = wrapper.compress verbose = wrapper.verbose if mode in [False, 'skip'] : return func( *args, **kwargs ) if name == None : name = func.__name__ if not os.path.exists(dir) : os.makedirs(dir) cachePath = os.path.join(dir, name) if compress : import lz4 if os.path.exists( cachePath ) and mode != 'update': if compress == 'lz4': cached = StringIO( lz4.loads( open(cachePath,'r').read() ) ) else: cached = cachePath #cached = open(cachePath,'r') if verbose: print '\t!! Cached from %s'%cachePath aOut = load( cached ) if aOut.shape != () or purge_empty_file == False: return aOut else: os.remove( cachePath ) raise ValueError, 'empty cache file (erased): %s'%(cachePath) if os.path.exists( cachePath ) == False or mode == 'update': aOut = func( *args, **kwargs ) if compress == 'lz4': cached = StringIO() save( cached, aOut ) open(cachePath,'w').write( lz4.dumps( cached.getvalue() ) ) else: fCache = open(cachePath,'wb') save( fCache, aOut ) fCache.close() if verbose: print '\t!! Cached to %s'%cachePath return aOut raise KeyError, 'failed exception handling for %s and %s'%( cachePath, mode )
def CompressFile(self, Name, OutName, f): try: f2 = open(Name,'rb') except: return if os.stat(Name).st_size < 1073741824: data = f2.read() try: f.writestr(OutName, lz4.dumps(data)) except: f.write(lz4.dumps(data)) else: OutName_tmp = OutName.replace(os.path.basename(OutName),"") OutName = OutName_tmp+"@@"+os.path.basename(OutName)+"/" del OutName_tmp xpart = 0 for piece in lz4o().read_in_chunks(f2, 104857600): f.writestr(OutName+str(xpart), lz4.dumps(piece)) xpart += 1 f2.close()
def __new__(cls, *args, **kwargs): response = { 'datas': cls._format_datas(kwargs['datas']), } activity_logger.debug('<Response ' + str(response['datas']) + '>') msg = msgpack.packb(response) if kwargs.pop('compression', False) is True: msg = lz4.dumps(msg) return msg
def encrypt(self, obj): if not j.basetype.string.check(obj): if j.basetype.dictionary.check(obj): val = obj else: val = obj.__dict__ val = ujson.dumps(val) else: val = obj val = lz4.dumps(val) val = j.db.serializers.blowfish.dumps(val, self.key) return val
def wrapper( *args, **kwargs): if 'CACHED' in kwargs: Option = kwargs.pop( 'CACHED' ) for k,v in Option.items(): self.__dict__[ k ] = v for k,v in kwargs.items(): if k.startswith('CACHED_'): self.__dict__[ k.split('_')[1].lower() ] = v if self.mode in [False, 'skip']: return func( *args, **kwargs ) if type(self.name) != str: name = self.name( func.__name__, args ) else: name = self.name if not os.path.exists(self.dir) : os.makedirs(self.dir) cachePath = os.path.join(self.dir, name) if self.compress not in [False, None]: cachePath = cachePath + '.%s'%self.compress if os.path.exists( cachePath ) and self.mode != 'update': if self.compress != 'lz4': cached = open(cachePath,'r') else: cached = StringIO( lz4.loads( open(cachePath,'r').read() ) ) if self.verbose: print '\t!! Cached from %s'%cachePath return load( cached ) else: aOut = func( *args, **kwargs ) if self.compress=='lz4': cached = StringIO() save( cached, aOut ) open(cachePath,'w').write( lz4.dumps( cached.getvalue() ) ) else: save( open(cachePath,'w'), aOut ) if self.verbose: print '\t!! Cached to %s'%cachePath return aOut
def __init__(self, data, format, size, compressed=True): self._compressed = compressed if self._compressed: self._data = lz4.dumps(data) else: self._data = data self._format = format self._size = size
def encrypt(self, obj): if isinstance(obj, unicode): obj = str(obj) if not j.basetype.string.check(obj): if j.basetype.dictionary.check(obj): val = obj else: val = obj.__dict__ val = ujson.dumps(val) else: val = obj val = lz4.dumps(val) val = j.db.serializers.blowfish.dumps(val, self.key) return val
def __init__(self, numpy_array): """Serializes and compresses the numpy array with LZ4""" # write numpy to memory using StringIO memfile = StringIO.StringIO() numpy.save(memfile, numpy_array) memfile.seek(0) numpy_array_binary = memfile.read() memfile.close() self.serialized_data = [] # write in chunks of 1 billion bytes to prevent overflow for index in range(0, len(numpy_array_binary), self.lz4_chunk): self.serialized_data.append(lz4.dumps(numpy_array_binary[index:index+self.lz4_chunk]))
def _compress_depth_frame(depth_frame): try: d = np.frombuffer(depth_frame.data, dtype=np.uint16).reshape( depth_frame.shape[::-1], order='C') high_bits = ((d >> 4) & 0xff).astype(np.uint8) low_bits = (d & 0xf).astype(np.uint8) packed_low_bits = (low_bits[:,0::2]<<4) | low_bits[:,1::2] bio = BytesIO() bio.write(np.asarray(high_bits, order='C').data) bio.write(np.asarray(packed_low_bits, order='C').data) return lz4.dumps(bio.getvalue()) except Exception as e: print('Error: {0}'.format(e)) return None
def decorator(*args, **kwargs): if CACHE is None: initialize_cache() kwargs_tuple = tuple(sorted(kwargs.items())) if not isinstance(args, collections.Hashable) \ or not isinstance(kwargs_tuple, collections.Hashable): msg = "Function arguments not hashable:" msg += "\n\targs:%s\n\tkwargs:%s" raise Exception(msg % (args, kwargs)) key = "%s.%s[%s]" % (fn.__module__, fn.__name__, hash((args, kwargs_tuple))) if not CACHE.exists(key): value = fn(*args, **kwargs) pickled_value = lz4.dumps(pickle.dumps(value)) CACHE.setex(key, DEFAULT_EXPIRY, pickled_value) return value else: pickled_value = CACHE.get(key) return pickle.loads(lz4.loads(pickled_value))
def get_dataframe(self, md5, compress='lz4'): """Return a dataframe from the DataStore. This is just a convenience method that uses get_sample internally. Args: md5: the md5 of the dataframe compress: compression to use: (defaults to 'lz4' but can be set to None) Returns: A msgpack'd Pandas DataFrame Raises: Workbench.DataNotFound if the dataframe is not found. """ # First we try a sample, if we can't find one we try getting a sample_set. sample = self.data_store.get_sample(md5) if not sample: raise WorkBench.DataNotFound("Could not find %s in the data store", md5) if not compress: return sample['raw_bytes'] else: compress_df = lz4.dumps(sample['raw_bytes']) print 'Info: DataFrame compression %.0f%%' % (len(compress_df)*100.0/float(len(sample['raw_bytes']))) return compress_df
def get_dataframe(self, md5, compress='lz4'): """Return a dataframe from the DataStore. This is just a convenience method that uses get_sample internally. Args: md5: the md5 of the dataframe compress: compression to use: (defaults to 'lz4' but can be set to None) Returns: A msgpack'd Pandas DataFrame Raises: Workbench.DataNotFound if the dataframe is not found. """ # First we try a sample, if we can't find one we try getting a sample_set. sample = self.data_store.get_sample(md5) if not sample: raise WorkBench.DataNotFound("Could not find %s in the data store", md5) if not compress: return sample['raw_bytes'] else: compress_df = lz4.dumps(sample['raw_bytes']) print 'Info: DataFrame compression %.0f%%' % ( len(compress_df) * 100.0 / float(len(sample['raw_bytes']))) return compress_df
import lz4 import sys DATA = open("/dev/urandom", "rb").read(128 * 1024) # Read 128kb sys.exit(DATA != lz4.loads(lz4.dumps(DATA)) and 1 or 0)
#!/usr/bin/env python # # Copyright (c) 2014, Jan Varho <*****@*****.**> # Some rights reserved, see COPYING import argparse import lz4 import sys if __name__ == '__main__': parser = argparse.ArgumentParser(description='LZ4 compress') parser.add_argument('-o', '--output') parser.add_argument('FILE') args = parser.parse_args() with open(args.FILE) as f: s = f.read() c = lz4.dumps(s) if args.output: with open(args.output, 'wb') as f: f.write(c) else: sys.stdout.write(c)
def test_random(self): DATA = os.urandom(128 * 1024) # Read 128kb self.assertEqual(DATA, lz4.loads(lz4.dumps(DATA)))
def DumpToNetworkString(self): obj_string = self.DumpToString() return lz4.dumps(obj_string)
def serialize(self): return lz4.dumps(msgpack.dumps(self.to_dict()))
def roundtrip(x): return lz4.loads(lz4.dumps(x))
def _set(self, key, values): #assert len(values) == len(self._meta["columns"]) assert key != "_meta" data = np.array(values, dtype=np.float64) self._db.put(key, lz4.dumps(data.tostring()))
def Lz4Blob(name): return ExprAdapter( Field("data", lambda ctx: ctx.header.message_length), encoder=lambda obj, ctx: lz4.dumps(obj), decoder=lambda obj, ctx: lz4.loads(obj), )
import lz4 data = 100 * b"Hello World!" compressed_data = lz4.dumps(data) print(len(data), len(compressed_data)) assert data == lz4.loads(compressed_data)
def dumps(self, obj): return lz4.dumps(self.serializer.dumps(obj))
def dumps_lz4(data): import lz4 return lz4.dumps(data)
def to_LZJSON(self): return lz4.dumps(self.to_JSON())
def test_string(self): DATA = "test" * (5 * 1024 * 1024) # 5mb of string self.assertEqual(DATA, lz4.loads(lz4.dumps(DATA)))
def __init__(self, data, format, size): self._data = lz4.dumps(data) self._format = format self._size = size
def lz4_energy(arr): return len(lz4.dumps(arr.tobytes()))
def DumpToNetworkString( self ): obj_string = self.DumpToString() return lz4.dumps( obj_string )
def pack(self, lz4_=False): ret = self.to_json().encode('utf-8') if lz4_: return lz4.dumps(ret) return ret
import lz4 import sys DATA = open("/dev/urandom", "rb").read(128 * 1024) # Read 128kb if DATA != lz4.loads(lz4.dumps(DATA)): sys.exit(1) # max size DATA = "x" * 2139095020 if DATA != lz4.loads(lz4.dumps(DATA)): sys.exit(1) # max size + 1 DATA = DATA + "x" try: lz4.dumps(DATA) sys.exit(1) except ValueError: pass sys.exit(0)
def lz_compresss(data): return lz4.dumps(data)[4:]
def test_random_ns_4096kb(self): DATA = os.urandom(4096 * 1024) # Read 5096kb self.assertEqual(DATA, lz4.loads(lz4.dumps(DATA, head_type=0), head_type=0))
def test_random_le32(self): DATA = os.urandom(128 * 1024) # Read 128kb self.assertEqual(DATA, lz4.loads(lz4.dumps(DATA, head_type=1), head_type=1))