def fileNCD(page1, page2): ''' Calculate normalized compression distance between two pages http://www.complearn.org/ncd.html @param page1: {String} the first page's file path @param page2: {String} the second page's file path @return: {Float} NCD value ''' data1, data2 = open(page1, 'rb').read(), open(page2, 'rb').read() len1, len2 = len(pylzma.compress(data1)), len(pylzma.compress(data2)) return 1.0 * (len(pylzma.compress(data1 + data2)) - min(len1, len2)) / max(len1, len2)
def _compression_dist(x, y, l_x=None, l_y=None): if x == y: return 0 x_b = x #.encode('utf-8') y_b = y #.encode('utf-8') if l_x is None: l_x = len(lzma.compress(x_b)) l_y = len(lzma.compress(y_b)) l_xy = len(lzma.compress(x_b + y_b)) l_yx = len(lzma.compress(y_b + x_b)) dist = np_utils._try_divide(min(l_xy, l_yx) - min(l_x, l_y), max(l_x, l_y)) return dist
def test_compression_decompression_noeos(self): # call compression and decompression on random data of various sizes for i in range(18): size = 1 << i original = generate_random(size) result = pylzma.decompress(pylzma.compress(original, eos=0), maxlength=size) self.assertEqual(md5(original).hexdigest(), md5(result).hexdigest())
def compress(buf, size): remain = size pos = 0 comp_size = 0 num_blocks = 0 data = "" while remain > 0 : num_blocks += 1 if remain > UNCOMP_BLOCK_SIZE : #print 'compress0 %d' % UNCOMP_BLOCK_SIZE head = struct.pack("I", UNCOMP_BLOCK_SIZE) block = buf[pos:pos+UNCOMP_BLOCK_SIZE] pos += UNCOMP_BLOCK_SIZE remain -= UNCOMP_BLOCK_SIZE else: #print 'compress1 %d' % remain head = struct.pack("I", remain) block = buf[pos:pos+remain] pos += remain remain -= remain dst = pylzma.compress(block, dictionary=23, fastBytes=273, eos=0)[5:] head += struct.pack("I8s", len(dst), "\x5d\x00\x40\x00\x00\x00\x00\x00") #out.write(head+dst) data += head+dst pad = len(head+dst) comp_size += pad pad = (pad + 0xf) / 0x10 * 0x10 - pad if (pad > 0) : #out.write(LZMA_PAD[0:pad]) data += LZMA_PAD[0:pad] comp_size += pad data = struct.pack("I", size) + data print "compress %s %d->%d, block %d." % (sys.argv[2], size, comp_size, num_blocks) return data
def _pylzma_compress(self, data): """Compresión pylzma dictionary Dictionary size (Range 0-28, Default: 23 (8MB)) The maximum value for dictionary size is 256 MB = 2^28 bytes. Dictionary size is calculated as DictionarySize = 2^N bytes. For decompressing file compressed by LZMA method with dictionary size D = 2^N you need about D bytes of memory (RAM). fastBytes Range 5-255, default 128 Usually big number gives a little bit better compression ratio and slower compression process. literalContextBits Range 0-8, default 3 Sometimes literalContextBits=4 gives gain for big files. literalPosBits Range 0-4, default 0 This switch is intended for periodical data when period is equal 2^N. For example, for 32-bit (4 bytes) periodical data you can use literalPosBits=2. Often it's better to set literalContextBits=0, if you change the literalPosBits switch. posBits Range 0-4, default 2 This switch is intended for periodical data when period is equal 2^N. algorithm Compression mode 0 = fast, 1 = normal, 2 = max (Default: 2) The lower the number specified for algorithm, the faster compression will perform. multithreading Use multithreading if available? (Default yes) Currently, multithreading is only available on Windows platforms. eos Should the End Of Stream marker be written? (Default yes) You can save some bytes if the marker is omitted, but the total uncompressed size must be stored by the application and used when decompressing: """ return pylzma.compress(data, algorithm=self._compression_level)
def compress_stub(method, idata): # compress if method == 0: return 0, idata elif method == 14: # M_LZMA import pylzma odata = pylzma.compress(idata, eos=0) ## FIXME: internal pylzma-0.3.0 error ##assert pylzma.decompress(odata, maxlength=len(idata)) == idata # recode lzma-header prop = ord(odata[0]) pb = (prop / 9) / 5; lp = (prop / 9) % 5; lc = prop % 9 h = chr(((lc + lp) << 3) | pb) + chr((lp << 4) | lc) odata = h + odata[5:] # encode upx stub header odata = encode_compressed_stub_header(method, idata, odata) + odata elif method == 15: # M_DEFLATE odata = zlib.compress(idata, 9) # strip zlib-header and zlib-trailer (adler32) odata = odata[2:-4] assert zlib.decompress(odata, -15) == idata # encode upx stub header odata = encode_compressed_stub_header(method, idata, odata) + odata else: raise Exception, ("invalid method", method, opts.methods) if 1 and len(odata) >= len(idata): # not compressible return 0, idata assert len(odata) <= len(idata), "stub compression failed" return method, odata
def generateClientDB(self,realm,realmDB): try: #Copy the master DB to the client DB shutil.copyfile("./data/ahserver/ahmaster.db","./data/ahserver/%s"%realmDB) #Connecting to the client DB to flush out the extra tables we do not need to send over dbconn = sqlite.connect("./data/ahserver/%s"%realmDB,isolation_level = None) dcursor = dbconn.cursor() dcursor.execute("BEGIN TRANSACTION;") dcursor.execute("DELETE from ItemList WHERE id IN (select item_list_id FROM ItemTransactionDB WHERE realm != %d);"%realm) dcursor.execute("drop table ItemTransactionDB") dcursor.execute("drop table ItemInstance") dcursor.execute("drop table ItemCharacterMapping") dcursor.execute("drop table ItemVariant") dcursor.execute("END TRANSACTION;") dcursor.execute("vacuum ItemTransactionDB") dcursor.execute("vacuum ItemInstance") dcursor.execute("vacuum ItemCharacterMapping") dcursor.execute("vacuum ItemVariant") dcursor.close() dbconn.close() #Compress the client DB. Better to do this once every 10 seconds than a bunch of times under heavy load of client requests f = file("./data/ahserver/%s"%realmDB,"rb") cbuffer = f.read() cbuffer = pylzma.compress(cbuffer,algorithm=0) cbuffer = sqlite.Binary(cbuffer) f.close() f = file("./data/ahserver/%s"%realmDB,"wb") f.write(cbuffer) f.close() except: print "Failed to write %s to disk"%realmDB
def test_compression_decompression_noeos(self): # call compression and decompression on random data of various sizes for i in range(18): size = 1 << i original = generate_random(size) result = pylzma.decompress_compat(pylzma.compress(original, eos=0))[:size] self.assertEqual(md5(original).hexdigest(), md5(result).hexdigest())
def get_edit_binary(display, path, conf, compressed_config=True, debug=False): logger.debug("generating binary %s with conf: %s" % (path, conf)) binary = b"" with open(path, 'rb') as f: binary = f.read() i = 0 offsets = [] while True: i = binary.find("####---PUPY_CONFIG_COMES_HERE---####\n", i + 1) if i == -1: break offsets.append(i) if not offsets: raise Exception( "Error: the offset to edit the config have not been found") elif len(offsets) > 1: raise Exception( "Error: multiple offsets to edit the config have been found") config = get_raw_conf(display, conf) pupylib = dependencies.importer(('network', 'pupy'), path=ROOT, as_dict=True) new_conf = marshal.dumps([config, pupylib]) logger.debug('First marshalled bytes: %s (total=%d)', ' '.join('{:02x}'.format(ord(c)) for c in new_conf[:64]), len(new_conf)) uncompressed = len(new_conf) if compressed_config: new_conf = pylzma.compress(new_conf) compressed = len(new_conf) new_conf = struct.pack('>II', compressed, uncompressed) + new_conf new_conf_len = len(new_conf) if new_conf_len > HARDCODED_CONF_SIZE: raise Exception( 'Error: config or offline script too long ({}/{} bytes)' 'You need to recompile the dll with a bigger buffer'.format( new_conf_len, HARDCODED_CONF_SIZE)) new_conf = new_conf + os.urandom(HARDCODED_CONF_SIZE - new_conf_len) logger.debug('Free space: %d', HARDCODED_CONF_SIZE - new_conf_len) offset = offsets[0] binary = binary[0:offset] + new_conf + binary[offset + HARDCODED_CONF_SIZE:] if binary[:2] == 'MZ': pe = pefile.PE(data=binary, fast_load=True) pe.OPTIONAL_HEADER.CheckSum = pe.generate_checksum() binary = pe.write() return binary
def compress_lzma(self, swf): 'compress a SWF with LZMA' if type(swf) is str: swf = StringIO(swf) if self.lzma_installed is False: if self.show_errors: print "\t[ERROR] pylzma module not installed - aborting validation/decompression" return None try: signature = swf.read(3) if signature != 'FWS': if self.show_errors: print "\t[ERROR] FWS Header not found, aborting lzma compression" return None else: vfl = swf.read(5) # "ZWS" | version | len | compressed len | lzma compressed data # TEST import pylzma lzma_data = pylzma.compress(swf.read()) return "ZWS" + vfl + struct.pack( "<I", len(lzma_data) - 5) + lzma_data except: return None
def compress_lzma(self, swf): """compress a SWF with LZMA""" if isinstance(swf, bytes): swf = BytesIO(swf) if not HAVE_LZMA: if self.show_errors: print( "\t[ERROR] pylzma module not installed - aborting validation/decompression" ) return None try: signature = swf.read(3) if signature != b'FWS': if self.show_errors: print( "\t[ERROR] FWS Header not found, aborting lzma compression" ) return None else: vfl = swf.read(5) # "ZWS" | version | len | compressed len | lzma compressed data # TEST lzma_data = lzma.compress(swf.read()) return b"ZWS" + vfl + struct.pack( "<I", len(lzma_data) - 5) + lzma_data except: return None
def compress(input_path, output_path): gwf_file = gwf.File(input_path) with open(output_path, 'wb') as output_file: for record in gwf_file.read(): compressed = pylzma.compress(record.serialize(), algorithm=compression_mode) write_block(compressed, record.ping_number, output_file)
def compress_stub(method, idata): # compress if method == 0: return 0, idata elif method == 14: # M_LZMA import pylzma odata = pylzma.compress(idata, eos=0) ## FIXME: internal pylzma-0.3.0 error ##assert pylzma.decompress(odata, maxlength=len(idata)) == idata # recode lzma-header prop = ord(odata[0]) pb = (prop / 9) / 5 lp = (prop / 9) % 5 lc = prop % 9 h = chr(((lc + lp) << 3) | pb) + chr((lp << 4) | lc) odata = h + odata[5:] # encode upx stub header odata = encode_compressed_stub_header(method, idata, odata) + odata elif method == 15: # M_DEFLATE odata = zlib.compress(idata, 9) # strip zlib-header and zlib-trailer (adler32) odata = odata[2:-4] assert zlib.decompress(odata, -15) == idata # encode upx stub header odata = encode_compressed_stub_header(method, idata, odata) + odata else: raise Exception, ("invalid method", method, opts.methods) if 1 and len(odata) >= len(idata): # not compressible return 0, idata assert len(odata) <= len(idata), "stub compression failed" return method, odata
def write_article(): global compress global verbose global output, f_out, i_out global article_count global g_this_article_title global file_number article_count += 1 if verbose: print "[MWR %d] %s" % (article_count, g_this_article_title) sys.stdout.flush() elif article_count % 1000 == 0: print "Render[%d]: %d" % (file_number, article_count) sys.stdout.flush() output.flush() # create link links_stream = io.BytesIO('') for i in g_links: (x0, y0, x1, y1, url) = g_links[i] links_stream.write(struct.pack('III', (y0 << 8) | x0, (y1 << 8) | x1, link_number(url))) links_stream.flush() links = links_stream.getvalue() links_stream.close() header = struct.pack('I2H', 8 + len(links), g_link_cnt, 0) body = output.getvalue() file_offset = f_out.tell() if compress: body = chr(5) + pylzma.compress(header+links+body, dictionary = 24, fastBytes = 32, literalContextBits = 3, literalPosBits = 0, posBits = 2, algorithm = 1, eos = 1) f_out.write(body) else: f_out.write(header) f_out.write(links) f_out.write(body) output.truncate(0) if compress: try: (article_number, fnd_offset, restricted) = article_index(g_this_article_title) data_offset = (file_offset & 0x7fffffff) if bool(int(restricted)): # '0' is True so turn it into False data_offset |= 0x80000000 data_length = (0x80 << 24) | (file_number << 24) | len(body) # 0x80 => lzma encoding i_out.write(struct.pack('III', data_offset, fnd_offset, data_length)) except KeyError: print 'Error in: write_article, Title not found' print 'Title:', g_this_article_title print 'Offset:', file_offset print 'Count:', article_count
def LZMAzipfile(file): f = open(file, 'rb+') line = f.read() result = pylzma.compress(line) f.seek(0) f.truncate() f.write(result) f.close()
def test_compression_decompression_eos(self): # call compression and decompression on random data of various sizes for i in xrange(18): size = 1 << i original = generate_random(size) result = pylzma.decompress(pylzma.compress(original, eos=1)) self.assertEqual(len(result), size) self.assertEqual(md5.new(original).hexdigest(), md5.new(result).hexdigest())
def test_matchfinders(self): # use different matchfinder algorithms for compression matchfinders = ['bt2', 'bt3', 'hc4'] original = 'hello world' for mf in matchfinders: result = pylzma.decompress(pylzma.compress(original, matchfinder=mf)) self.assertEqual(original, result) self.failUnlessRaises(TypeError, pylzma.compress, original, matchfinder='1234')
def lzma_algm(file_name): start_time = time.time() import pylzma outfilename = file_name + '.lzma' try: with open(file_name, 'rb') as f, open(outfilename, 'wb') as out: out.write(pylzma.compress(f.read())) finally: comprsn_details(file_name, outfilename, start_time)
def CompressData(data): return pylzma.compress(data, dictionary=24, fastBytes=32, literalContextBits=3, literalPosBits=0, posBits=2, algorithm=1, eos=1)
def test_compression_decompression_eos(self): # call compression and decompression on random data of various sizes for i in xrange(18): size = 1 << i original = generate_random(size) result = pylzma.decompress(pylzma.compress(original, eos=1)) self.assertEqual(len(result), size) self.assertEqual( md5.new(original).hexdigest(), md5.new(result).hexdigest())
def depthCallback(self, dev, depth, timestamp): # resize grid depth0 = depth[self.useCols, self.useRows] # median of this + previous frames: reduces noise, and greatly improves compression on similar frames if self.medianOf > 1: self.depths.insert(0, depth0) depth = numpy.median(numpy.dstack(self.depths), axis = 2).astype(numpy.int16) self.depths.pop() else: depth = depth0 # flip x axis so the orientation is correct depth = numpy.fliplr(depth) # rescale depths numpy.clip(depth, 0, 2 ** 10 - 1, depth) depth >>= 2 # calculate quadrant averages (used to pan camera; could otherwise be done in JS) h, w = self.h, self.w halfH, halfW = h / 2, w / 2 qtl = numpy.mean(depth[0:halfH, 0:halfW]) qtr = numpy.mean(depth[0:halfH, halfW:w]) qbl = numpy.mean(depth[halfH:h, 0:halfW]) qbr = numpy.mean(depth[halfH:h, halfW:w]) depth = depth.ravel() # 1-D version # calculate diff from last frame (unless it's a keyframe) keyFrame = self.currentFrame == 0 diffDepth = depth if keyFrame else depth - self.lastDepth # optionally produce pixel diffs (oddly, pixel diffing seems to *increase* compressed data size) if self.pixelDiffs: diffDepth = numpy.concatenate(([diffDepth[0]], numpy.diff(diffDepth))) # smush data together data = numpy.concatenate(([keyFrame, qtl, qtr, qbl, qbr], diffDepth % 256)) # compress and broadcast crunchedData = pylzma.compress(data.astype(numpy.uint8), dictionary = 18) # default: 23 -> 2 ** 23 -> 8MB # write out test data # ff = open('/tmp/test_depth.bin', 'ab') # ff.write(crunchedData) # ff.close() reactor.callFromThread(self.wsFactory.broadcast, crunchedData, True) # setup for next frame self.lastDepth = depth self.currentFrame += 1 self.currentFrame %= self.keyFrameEvery
def encode_post(post): mape = __pmap() np = dict() for k,v in post.items(): nk = k if(k in mape): nk = mape[k] np[nk] = v js = json.dumps(np) data = pylzma.compress(js) #log("Encoded post from %d to %d bytes (%.2f%% reduction)" % (len(js), len(data), 100.00-(len(data)/len(js))*100.00)) return data
def populate_queue(f_obj): global file_queue file_queue = Queue.Queue() cfile = StringIO(pylzma.compress(f_obj.read(), eos=1)) cfile.seek(0, 2) cfile_size = cfile.tell() cfile.seek(0, 0) c_size = CHUNK_SIZE(cfile_size) while abs(cfile_size - cfile.tell()) / c_size > 1: rospy.loginfo("Compressing data packet") file_queue.put(cfile.read(c_size)) file_queue.put(cfile.read())
def test_compress_large_stream_bigchunks(self): # decompress large block of repeating data, stream version with big chunks data = bytes("asdf", 'ascii')*123456 decompress = pylzma.decompressobj() infile = BytesIO(pylzma.compress(data)) outfile = BytesIO() while 1: tmp = infile.read(1024) if not tmp: break outfile.write(decompress.decompress(tmp)) outfile.write(decompress.flush()) self.failUnless(data == outfile.getvalue())
def test_compress_large_stream_bigchunks(self): # decompress large block of repeating data, stream version with big chunks data = "asdf" * 123456 decompress = pylzma.decompressobj() infile = StringIO(pylzma.compress(data)) outfile = StringIO() while 1: tmp = infile.read(1024) if not tmp: break outfile.write(decompress.decompress(tmp)) outfile.write(decompress.flush()) self.failUnless(data == outfile.getvalue())
def test_compress_large_stream(self): # decompress large block of repeating data, stream version (bug reported by Christopher Perkins) data = "asdf" * 123456 decompress = pylzma.decompressobj() infile = StringIO(pylzma.compress(data)) outfile = StringIO() while 1: tmp = infile.read(1) if not tmp: break outfile.write(decompress.decompress(tmp)) outfile.write(decompress.flush()) self.failUnless(data == outfile.getvalue())
def test_compress_large_stream(self): # decompress large block of repeating data, stream version (bug reported by Christopher Perkins) data = bytes("asdf", 'ascii')*123456 decompress = pylzma.decompressobj() infile = BytesIO(pylzma.compress(data)) outfile = BytesIO() while 1: tmp = infile.read(1) if not tmp: break outfile.write(decompress.decompress(tmp)) outfile.write(decompress.flush()) self.failUnless(data == outfile.getvalue())
def run(self): try: if self.compresslib=="lzma": self.datacompressed=pylzma.compress(self.data,algorithm=self.compressionlevel) elif self.compresslib=="zlib": self.datacompressed=zlib.compress(self.data,self.compressionlevel) elif self.compresslib=="bz2": self.datacompressed=bz2.compress(self.data,self.compressionlevel) elif self.compresslib=="none": self.datacompressed=self.data except: self.exception=True raise
def get(self): ''' 读取压缩数据并返回使用时间 'return'第一个为数据,第二个为时间 ''' logging.info('start compressing') start_time = time.time() data = pylzma.compress(self.__file, fastBytes=self.__fb, eos=True) end_time = time.time() logging.info('compress complete,use time:{},package size:{}m'.format( end_time - start_time, str(round(len(data) / 1024 / 1024, 3)))) return data, end_time - start_time
def test_matchfinders(self): # use different matchfinder algorithms for compression matchfinders = ['bt2', 'bt3', 'hc4'] original = 'hello world' for mf in matchfinders: result = pylzma.decompress( pylzma.compress(original, matchfinder=mf)) self.assertEqual(original, result) self.failUnlessRaises(TypeError, pylzma.compress, original, matchfinder='1234')
def fileStatistics(): ''' Check and save all images' statistics ''' files = os.listdir('databases/PNG/') fw = open('databases/fileStatistics.txt', 'w') number = len(files) fw.write('fileSize compSize URL\n') for i, f in enumerate(files): print '%4d/%4d\t%s' % (i, number, f) statinfo = os.stat('databases/PNG/%s' % f) fw.write('%-9d %-9d %s\n' % (statinfo.st_size, \ len(pylzma.compress(open('databases/PNG/%s' % f, 'rb').read())), \ f.replace('%E2', '/').replace('%3A', ':')[:-4])) pass # for i, f in enumerate(files) fw.close()
def compress_lzma(self): self.flash.seek(8) self.lzma_compressed = pylzma.compress(self.flash.read1(self.file_factor)) self.compressed_file_size = len(self.lzma_compressed) - 5 lzma_file = io.BytesIO() lzma_file.write(b'ZWS') lzma_file.write(struct.pack("<B", self.version)) lzma_file.write(struct.pack("<I", self.file_size)) lzma_file.write(struct.pack("<I", self.compressed_file_size)) lzma_file.write(self.lzma_compressed) lzma_file.seek(0) return lzma_file
def pickleSave(df, fname='data.pkl'): ''' General routine for saving any data to be pickled with lzma comression. ''' with open(fname, 'wb') as f: pickled = cPickle.dumps(df, protocol=-1) pickled = pylzma.compress(pickled, dictionary=26, fastBytes=255, literalContextBits=3, literalPosBits=0, posBits=2, algorithm=2, eos=1, multithreading=1) f.write(pickled)
def __setitem__(self, key, value): from vyperlogix.crypto.Encryptors import Encryptors from vyperlogix.misc import GenPasswd key = key if (misc.isString(key)) else str(key) if (self.has_key(key)) and (value == None): self.__delitem__(key) else: if (misc.isList(value)): value = [ v if (not lists.can_asDict(v)) else v.asDict() for v in value ] else: value = value if ( not lists.can_asDict(value)) else value.asDict() if (self.isPickleMethodUseStrings): if (not misc.isString(value)): if not misc.isList(value): value = list(value) if (not isinstance( value, dict)) else [(k, v) for k, v in value.iteritems()] val = [ '|'.join([str(x) for x in v]) if isinstance(v, tuple) else str(v) for v in value ] if (len(val) > 0): value = ','.join(val) if (len(val) > 1) else val[0] else: value = '' elif (self.isPickleMethodUseMarshal): value = strToHex(marshal.dumps(value, 2)) elif (self.isPickleMethodUseBsdDbShelf): value = self.pickleItem(value) elif (self.isPickleMethodUseSafeSerializer): value = dumps(value, CompressionOption.compression) elif (self.isPickleMethodUseCerealizer): #cls = eval(ObjectTypeName.typeClassName(value)) #if (not cerealizer.isRegistered(cls)): #cerealizer.register(cls) value = cerealizer.dumps(value) if (not self.isPickleMethodUseSafeSerializer): if (self.isPickleMethodUseZLIB): value = zlib.compress(value, zlib.Z_BEST_COMPRESSION) elif (self.isPickleMethodUseLZMA): value = pylzma.compress(value, eos=1) self.__db[key] = value
def compress(infile, outfile): fi = open(infile, "rb") swf_size = os.path.getsize(infile) swf_data = fi.read() fi.close() validate((swf_data[1] == 'W') and (swf_data[2] == 'S'), "not a SWF file", 112) if swf_data[0] == 'Z': print "LZMA", outfile sys.exit(0) dfilesize = struct.unpack("<I", swf_data[4:8])[0] - 8 if swf_data[0] == 'C': # compressed SWF ddata = zlib.decompress(swf_data[8:]) else: # uncompressed SWF validate((swf_data[0] == 'F'), "not a SWF file", 113) ddata = swf_data[8:] validate((dfilesize == len(ddata)), 'decompression failure', 114) zdata = pylzma.compress(ddata, eos=1) # 5 accounts for lzma props zsize = len(zdata) - 5 zheader = list(struct.unpack("<12B", swf_data[0:12])) zheader[0] = ord('Z') zheader[3] = 13 zheader[8] = (zsize) & 0xFF zheader[9] = (zsize >> 8) & 0xFF zheader[10] = (zsize >> 16) & 0xFF zheader[11] = (zsize >> 24) & 0xFF fo = open(outfile, 'wb') fo.write(struct.pack("<12B", *zheader)) fo.write(zdata) fo.close() opt_size = os.path.getsize(outfile) print "%6.2f%% %7.7sB " % ( 100 - (100.0 * opt_size / swf_size), kilo(swf_size - opt_size) ) + outfile + ": ", kilo(swf_size) + " -> " + kilo(opt_size)
def ConvertMavlinkToTextMessage(self, ListOfMavlinkMessages): ###################################################################################### # # Summary: Take a list of mavlink messages, converts them to a single text buffer, crushes # the buffer size down with LZMA compression, encodes the compressed buffer in Base64, and # returns the encoded buffer. Base64 is used to make the text buffer url/sms/email safe. # ###################################################################################### BufferOfMavlinkMessages = "" for message in ListOfMavlinkMessages: MessageInASCII = binascii.hexlify(message.get_msgbuf()) BufferOfMavlinkMessages+=MessageInASCII CompressedMavlinkBuffer = pylzma.compress(BufferOfMavlinkMessages) EncodedMavlinkBuffer = base64.b64encode(CompressedMavlinkBuffer) TestMavlinkBuffer = base64.b64encode(BufferOfMavlinkMessages) #debug return EncodedMavlinkBuffer
def ConvertMavlinkToTextMessage(self, ListOfMavlinkMessages): ###################################################################################### # # Summary: Take a list of mavlink messages, converts them to a single text buffer, crushes # the buffer size down with LZMA compression, encodes the compressed buffer in Base64, and # returns the encoded buffer. Base64 is used to make the text buffer url/sms/email safe. # ###################################################################################### BufferOfMavlinkMessages = "" for message in ListOfMavlinkMessages: MessageInASCII = binascii.hexlify(message.get_msgbuf()) BufferOfMavlinkMessages += MessageInASCII CompressedMavlinkBuffer = pylzma.compress(BufferOfMavlinkMessages) EncodedMavlinkBuffer = base64.b64encode(CompressedMavlinkBuffer) TestMavlinkBuffer = base64.b64encode(BufferOfMavlinkMessages) #debug return EncodedMavlinkBuffer
def compressXelapedia(source, dest): createXelapedia(dest) # connect with the database with the compressed articles con = sqlite.connect(dest) con.text_factory = str # connect the database with the uncompressed articles con.execute('ATTACH ? AS source', (source, )) # update the configuration con.execute('UPDATE config SET value=\'lzma\' WHERE key=\'type\'') # empty the destination database con.execute('DELETE FROM articles') con.execute('DELETE FROM titles') con.execute('DELETE FROM redurects') # copy the titles con.execute('INSERT INTO titles(title, article_id) ' + 'SELECT title, article_id FROM source.titles') con.commit() # we dont need the table attached directly anymore con.execute('DETACH source') conSource = sqlite.connect(source) conSource.text_factory = str # now copy and compress the articles #con.create_function('compress', 1, compressFunction) #con.execute('INSERT INTO articles(id, contents) ' + # 'SELECT id, compress(contents) FROM source.articles ORDER BY id') cur = conSource.execute('SELECT id, contents FROM articles ORDER BY id') for id, uncompressed in cur: compressed = Binary(compress(uncompressed)) con.execute('INSERT INTO articles(id, contents) VALUES(?,?)', ( id, compressed, )) stdout.write('.') stdout.flush() con.commit()
def compress(infile, outfile): fi = open(infile, "rb") swf_size = os.path.getsize(infile) swf_data = fi.read() fi.close() validate((swf_data[1] == 'W') and (swf_data[2] == 'S'), "not a SWF file", 112) if swf_data[0] == 'Z': print "LZMA", outfile sys.exit(0) dfilesize = struct.unpack("<I", swf_data[4:8])[0] - 8 if swf_data[0] == 'C': # compressed SWF ddata = zlib.decompress(swf_data[8:]) else: # uncompressed SWF validate((swf_data[0] == 'F'), "not a SWF file", 113) ddata = swf_data[8:] validate((dfilesize == len(ddata)), 'decompression failure', 114) zdata = pylzma.compress(ddata, eos=1) # 5 accounts for lzma props zsize = len(zdata) - 5 zheader = list(struct.unpack("<12B", swf_data[0:12])) zheader[0] = ord('Z') zheader[3] = 13 zheader[8] = (zsize) & 0xFF zheader[9] = (zsize >> 8) & 0xFF zheader[10] = (zsize >> 16) & 0xFF zheader[11] = (zsize >> 24) & 0xFF fo = open(outfile, 'wb') fo.write(struct.pack("<12B", *zheader)) fo.write(zdata) fo.close() opt_size = os.path.getsize(outfile) print "%6.2f%% %7.7sB " % (100 - (100.0 * opt_size / swf_size), kilo(swf_size - opt_size)) + outfile + ": ", kilo(swf_size) + " -> " + kilo(opt_size)
def process(files, data): ''' Process the data @param files: {List} file list @param data: {List} data list ''' number = len(files) fResult = open('databases/results.txt', 'w') for i in range(number): for j in range(i + 1, number): step = {} # Calculate NCD clen1, clen2 = data[i]['clen'], data[j]['clen'] step['ncd'] = 1.0 * (len(pylzma.compress(data[i]['byte'] + data[j]['byte'])) - min(clen1, clen2)) \ / max(clen1, clen2) # Calculate tree edit distance step['ted'] = zss.simple_distance(data[i]['etree'], data[j]['etree']) # Calculate color histogram distances hellingerDistance = 0.0 bhattacharyyaDistance = 0.0 totalVariationDistance = 0.0 colors = list(set(data[i]['hist'].keys() + data[j]['hist'].keys())) for c in colors: count1, count2 = data[i]['hist'].get(c), data[j]['hist'].get(c) if count1 is None: count1 = 0.0 if count2 is None: count2 = 0.0 hellingerDistance += (count1 ** 0.5 - count2 ** 0.5) ** 2 bhattacharyyaDistance += (count1 * count2) ** 0.5 if totalVariationDistance < math.fabs(count1 - count2): totalVariationDistance = math.fabs(count1 - count2) pass # for c in colors hellingerDistance = (hellingerDistance / 2.0) ** 0.5 bhattacharyyaDistance = -math.log(bhattacharyyaDistance) step['hist'] = [hellingerDistance, bhattacharyyaDistance, totalVariationDistance] # Write down results print 'Calculating: %4d, %4d / %4d' % (i, j, number) fResult.write('%4d,%4d:%s\n' % (i, j, step)) pass # for - for fResult.close()
def compressXelapedia(source, dest): createXelapedia(dest) # connect with the database with the compressed articles con = sqlite.connect(dest) con.text_factory=str # connect the database with the uncompressed articles con.execute('ATTACH ? AS source', (source,)) # update the configuration con.execute('UPDATE config SET value=\'lzma\' WHERE key=\'type\'') # empty the destination database con.execute('DELETE FROM articles') con.execute('DELETE FROM titles') con.execute('DELETE FROM redurects') # copy the titles con.execute('INSERT INTO titles(title, article_id) ' + 'SELECT title, article_id FROM source.titles') con.commit() # we dont need the table attached directly anymore con.execute('DETACH source') conSource = sqlite.connect(source) conSource.text_factory=str # now copy and compress the articles #con.create_function('compress', 1, compressFunction) #con.execute('INSERT INTO articles(id, contents) ' + # 'SELECT id, compress(contents) FROM source.articles ORDER BY id') cur=conSource.execute('SELECT id, contents FROM articles ORDER BY id') for id, uncompressed in cur: compressed = Binary(compress(uncompressed)) con.execute('INSERT INTO articles(id, contents) VALUES(?,?)', (id, compressed,)) stdout.write('.') stdout.flush() con.commit()
def zip(inData, compression): if (compression == 'lzma'): check((inData[0] != 'Z'), "already LZMA compressed") rawSwf = unzip(inData) debug('Compressing with lzma') compressData = pylzma.compress(rawSwf[8:], eos=1) # 5 accounts for lzma props compressSize = len(compressData) - 5 header = list(struct.unpack("<12B", inData[0:12])) header[0] = ord('Z') header[3] = header[3] >= 13 and header[3] or 13 header[8] = (compressSize) & 0xFF header[9] = (compressSize >> 8) & 0xFF header[10] = (compressSize >> 16) & 0xFF header[11] = (compressSize >> 24) & 0xFF debug('Packing lzma header') headerBytes = struct.pack("<12B", *header) else: check((inData[0] != 'C'), "already zlib compressed") rawSwf = unzip(inData) debug('Compressing with zlib') compressData = zlib.compress(rawSwf[8:]) compressSize = len(compressData) header = list(struct.unpack("<8B", inData[0:8])) header[0] = ord('C') header[3] = header[3] >= 6 and header[3] or 6 debug('Packing zlib header') headerBytes = struct.pack("<8B", *header) debug('Generating compressed data') return headerBytes + compressData
def zip(inData, compression): if(compression == 'lzma'): check((inData[0] != 'Z'), "already LZMA compressed") rawSwf = unzip(inData); debug('Compressing with lzma') compressData = pylzma.compress(rawSwf[8:], eos=1) # 5 accounts for lzma props compressSize = len(compressData) - 5 header = list(struct.unpack("<12B", inData[0:12])) header[0] = ord('Z') header[3] = header[3]>=13 and header[3] or 13 header[8] = (compressSize) & 0xFF header[9] = (compressSize >> 8) & 0xFF header[10] = (compressSize >> 16) & 0xFF header[11] = (compressSize >> 24) & 0xFF debug('Packing lzma header') headerBytes = struct.pack("<12B", *header); else: check((inData[0] != 'C'), "already zlib compressed") rawSwf = unzip(inData); debug('Compressing with zlib') compressData = zlib.compress(rawSwf[8:]) compressSize = len(compressData) header = list(struct.unpack("<8B", inData[0:8])) header[0] = ord('C') header[3] = header[3]>=6 and header[3] or 6 debug('Packing zlib header') headerBytes = struct.pack("<8B", *header) debug('Generating compressed data') return headerBytes+compressData
def packHistogramCollection(collection, encode=True, compress=True): """ Pack a collection of histograms """ # Prepare header of the buffer buf = struct.pack("<BI", 1, len(collection)) # Place histograms for h in collection: buf += packHistogram(h) # Compress if asked if compress: buf = pylzma.compress(buf) # Encode if asked if encode: buf = base64.b64encode(buf) # Return buffer return buf
def get_edit_binary(display, path, conf, compressed_config=True, debug=False): logger.debug("generating binary %s with conf: %s"%(path, conf)) binary=b"" with open(path, 'rb') as f: binary=f.read() i=0 offsets=[] while True: i=binary.find("####---PUPY_CONFIG_COMES_HERE---####\n", i+1) if i==-1: break offsets.append(i) if not offsets: raise Exception("Error: the offset to edit the config have not been found") elif len(offsets) > 1: raise Exception("Error: multiple offsets to edit the config have been found") new_conf = marshal.dumps(compile(get_raw_conf(display, conf), '<config>', 'exec')) uncompressed = len(new_conf) if compressed_config: new_conf = pylzma.compress(new_conf) compressed = len(new_conf) new_conf = struct.pack('>II', compressed, uncompressed) + new_conf new_conf_len = len(new_conf) if new_conf_len > HARDCODED_CONF_SIZE: raise Exception( 'Error: config or offline script too long ({}/{} bytes)' 'You need to recompile the dll with a bigger buffer'.format(new_conf_len, HARDCODED_CONF_SIZE) ) new_conf = new_conf + os.urandom(HARDCODED_CONF_SIZE-new_conf_len) logger.debug('Free space: %d', HARDCODED_CONF_SIZE-new_conf_len) offset = offsets[0] binary = binary[0:offset]+new_conf+binary[offset+HARDCODED_CONF_SIZE:] return binary
def convert(infile, outfile): fi = open(infile, "rb") swf_size = os.path.getsize(infile) swf_data = fi.read() fi.close() check((swf_data[1] == 'W') and (swf_data[2] == 'S'), "not a SWF file") check((ord(swf_data[3]) >= 13), "only SWF version 13 or higher is supported") check((swf_data[0] != 'Z'), "already LZMA compressed") dfilesize = struct.unpack("<I", swf_data[4:8])[0] - 8 if swf_data[0] == 'C': # compressed SWF ddata = zlib.decompress(swf_data[8:]) else: # uncompressed SWF check((swf_data[0] == 'F'), "not a SWF file") ddata = swf_data[8:] check((dfilesize == len(ddata)), 'decompression failure') zdata = pylzma.compress(ddata, eos=1) # 5 accounts for lzma props zsize = len(zdata) - 5 zheader = list(struct.unpack("<12B", swf_data[0:12])) zheader[0] = ord('Z') zheader[8] = (zsize) & 0xFF zheader[9] = (zsize >> 8) & 0xFF zheader[10] = (zsize >> 16) & 0xFF zheader[11] = (zsize >> 24) & 0xFF fo = open(outfile, 'wb') fo.write(struct.pack("<12B", *zheader)) fo.write(zdata) fo.close() print 'compression: %d%%' % round(100 - (100.0 * zsize / swf_size))
def compress_lzma(self, swf): 'compress a SWF with LZMA' if type(swf) is str: swf = StringIO(swf) if self.lzma_installed is False: if self.show_errors: print "\t[ERROR] pylzma module not installed - aborting validation/decompression" return None try: signature = swf.read(3) if signature != 'FWS': if self.show_errors: print "\t[ERROR] FWS Header not found, aborting lzma compression" return None else: vfl = swf.read(5) # "ZWS" | version | len | compressed len | lzma compressed data # TEST import pylzma lzma_data = pylzma.compress(swf.read()) return "ZWS" + vfl + struct.pack("<I", len(lzma_data)-5) + lzma_data except: return None
def LzmaEnc(binStr, dictionary = 23, fastBytes = 128, algorithm = 2, matchfinder = 2, \ literalContextBits = 3, literalPosBits = 0, posBits = 2): # parameters dictionary = int(min(max(dictionary, 0), 26)) # some machines supports up to 27, but for sure set to 26 fastBytes = int(min(max(fastBytes, 0), 256)) algorithm = int(min(max(algorithm, 0), 2)) matchfinder = ['bt2','bt3','bt4','bt4b','pat2r','pat2','pat2h','pat3h','pat4h','hc3','hc4'][int(min(max(matchfinder,0),10))] literalContextBits = int(min(max(literalContextBits, 0), 8)) literalPosBits = int(min(max(literalPosBits, 0), 4)) posBits = int(min(max(posBits, 0), 4)) # convert bit string into real binary data orgdata = BitArray(bin = binStr).tobytes() # compress the bin data try: cmpdata = compress(orgdata, dictionary, fastBytes, literalContextBits, literalPosBits, posBits, algorithm, matchfinder = matchfinder) except: GlobalMsg.panic('LZMA internal error, compression failed') # output: compressed data, original binary length, original bin string length return cmpdata, len(orgdata),len(binStr)
def get_edit_binary(path, conf): logging.debug("generating binary %s with conf: %s"%(path, conf)) binary=b"" with open(path, 'rb') as f: binary=f.read() i=0 offsets=[] while True: i=binary.find("####---PUPY_CONFIG_COMES_HERE---####\n", i+1) if i==-1: break offsets.append(i) if not offsets: raise Exception("Error: the offset to edit the config have not been found") elif len(offsets) > 1: raise Exception("Error: multiple offsets to edit the config have been found") new_conf = marshal.dumps(compile(get_raw_conf(conf), '<string>', 'exec')) uncompressed = len(new_conf) new_conf = pylzma.compress(new_conf) compressed = len(new_conf) new_conf = struct.pack('>II', compressed, uncompressed) + new_conf new_conf_len = len(new_conf) if new_conf_len > HARDCODED_CONF_SIZE: raise Exception( 'Error: config or offline script too long ({}/{} bytes)' 'You need to recompile the dll with a bigger buffer'.format(new_conf_len, HARDCODED_CONF_SIZE) ) new_conf = new_conf + os.urandom(HARDCODED_CONF_SIZE-new_conf_len) offset = offsets[0] binary = binary[0:offset]+new_conf+binary[offset+HARDCODED_CONF_SIZE:] return binary
def save(self, path_stack, compression="gzip"): """ Save Stack instance to .stack file. Parameters ---------- path_stack : str The full path to the .stack file that should be created, including the extension. compression : {'gzip', 'lzma'}, default 'gzip' The intended compression type. 'lzma' offers high compression but can be very slow. Returns ------- None """ protocol = cPickle.HIGHEST_PROTOCOL if not path_stack.endswith('.stack'): raise ValueError( "To avoid ambiguity, when using Stack.save() you must provide the full path to " "the stack file you want to create, including the file extension. For example: " "stack.save(path_stack='./output/MyStack.stack'). Your call looks like this: " "stack.save(path_stack='%s', ...)" % (path_stack) ) if compression is None: f = open(path_stack, 'wb') cPickle.dump(self, f, protocol) elif compression.lower() == "lzma": f = open(path_stack, 'wb') cPickle.dump(pylzma.compress(bytes(self)), f, protocol) else: f = gzip.open(path_stack, 'wb') cPickle.dump(self, f, protocol) f.close()
#################################### # Fix the FileLength header uncompressedLength = o.tell() o.seek(4) o.write(struct.pack("I", uncompressedLength)) o.flush() o.seek(0) # Copy the temp file to the outFile, compressing if necessary outFile = open(infile, "wb") if signature == "FWS": shutil.copyfileobj(o, outFile) else: outFile.write(o.read(8)) # File is compressed after header if signature == "CWS": outFile.write(zlib.compress(o.read())) elif signature == "ZWS": compressed = pylzma.compress(o.read()) outputInt(outFile, len(compressed)-5) # LZMA SWF has CompressedLength header field outFile.write(compressed) else: assert(false) outFile.close() if passwordClear: print("Added opt-in flag with encrypted password " + passwordClear) else: print("Added opt-in flag with no password")
NewFile += "CWS" elif Compression == 2: NewFile += "ZWS" NewFile += fCon[3] inFLen_pak = struct.pack("L",inFLen) NewFile += inFLen_pak if inFLen > 8: DecompCon = fCon[8:] T = "" if Compression == 1: T = zlib.compress(DecompCon) elif Compression == 2: T = pylzma.compress(DecompCon) len_T = len(T) - 5 # 5-byte properties of LZMA Header s_len_T = struct.pack("L",len_T) NewFile += s_len_T if T != "": NewFile += T outFileName = "" if Compression == 1: outFileName = "zlib_compressed.swf" elif Compression == 2: outFileName = "lzma_compressed.swf" fOut = open(outFileName,"wb") fOut.write(NewFile) fOut.close() print "Compressed file was written to " + outFileName sys.exit(0)
def depthCallback(self, dev, depth, timestamp): # resize grid depth0 = depth[self.useCols, self.useRows] """ # manual frame medianing -- v slow h, w, u = self.h, self.w, self.useEvery depth0 = numpy.empty(shape = (h, w)) medianIdx = u ** 2 / 2 for y in range(0, h): for x in range(0, w): yOff, xOff = y * u, x * u box = depth[yOff : yOff + u, xOff : xOff + u] depth0[y, x] = numpy.sort(box.reshape(-1))[medianIdx] """ """ # less manual frame medianing -- also v slow -- needs w of 640, not 632 h, w, u = self.h, self.w, self.useEvery medianLen = u ** 2 medianIdx = medianLen / 2 depth0 = numpy.sort(numpy.array(numpy.hsplit(numpy.array(numpy.hsplit(depth, w)), h)).reshape(-1, medianLen))[..., medianIdx].reshape(h, w) """ # median of this + previous frames: reduces noise, and greatly improves compression on similar frames if self.medianOf > 1: self.depths.insert(0, depth0) depth = numpy.median(numpy.dstack(self.depths), axis = 2).astype(numpy.int16) self.depths.pop() else: depth = depth0 # rescale depths numpy.clip(depth, 0, 2 ** 10 - 1, depth) depth >>= 2 # calculate quadrant averages (used to pan camera; could otherwise be done in JS) h, w = self.h, self.w halfH, halfW = h / 2, w / 2 qtl = numpy.mean(depth[0:halfH, 0:halfW]) qtr = numpy.mean(depth[0:halfH, halfW:w]) qbl = numpy.mean(depth[halfH:h, 0:halfW]) qbr = numpy.mean(depth[halfH:h, halfW:w]) depth = depth.ravel() # 1-D version # calculate diff from last frame (unless it's a keyframe) keyFrame = self.currentFrame == 0 diffDepth = depth if keyFrame else depth - self.lastDepth # optionally produce pixel diffs (oddly, pixel diffing seems to *increase* compressed data size) if self.pixelDiffs: diffDepth = numpy.concatenate(([diffDepth[0]], numpy.diff(diffDepth))) # smush data together data = numpy.concatenate(([keyFrame, qtl, qtr, qbl, qbr], diffDepth % 256)) # compress and broadcast crunchedData = pylzma.compress(data.astype(numpy.uint8), dictionary = 18) # default: 23 -> 2 ** 23 -> 8MB reactor.callFromThread(self.wsFactory.broadcast, crunchedData, True) # setup for next frame self.lastDepth = depth self.currentFrame += 1 self.currentFrame %= self.keyFrameEvery
def test_compression_no_eos(self): # test compression without end of stream marker compressed = pylzma.compress(self.plain, eos=0) self.assertEqual(compressed, self.plain_without_eos)
swf_bytearray[stage264_offset + XOR_OFFT64 + 0] = hex_xorkey[6] swf_bytearray[stage264_offset + XOR_OFFT64 + 1] = hex_xorkey[7] swf_bytearray[stage264_offset + XOR_OFFT64 + 2] = hex_xorkey[4] swf_bytearray[stage264_offset + XOR_OFFT64 + 3] = hex_xorkey[5] swf_bytearray[stage264_offset + XOR_OFFT64 + 4] = hex_xorkey[2] swf_bytearray[stage264_offset + XOR_OFFT64 + 5] = hex_xorkey[3] swf_bytearray[stage264_offset + XOR_OFFT64 + 6] = hex_xorkey[0] swf_bytearray[stage264_offset + XOR_OFFT64 + 7] = hex_xorkey[1] # compress swf uncompressed_len = len(swf_bytearray) uncompressed_len += len("ZWS\x0d") uncompressed_len += 4 # + se stessa print "[+] Uncompressed len: 0x%x" %(uncompressed_len) lzma_buff = pylzma.compress(byteArray2String(swf_bytearray)) compressed_len = len(lzma_buff) - 5 print "[+] Compressed len: 0x%x" %(compressed_len) output_buff = "ZWS\x0d" output_buff += struct.pack("<L", uncompressed_len) output_buff += struct.pack("<L", compressed_len) output_buff += lzma_buff # write it open(SWF_RANDOM_NAME, 'wb').write(output_buff) # modify ole link ole_link_buff = open("tmp/word/activeX/activeX1.bin", 'rb').read() ole_link_offt = ole_link_buff.find("h\x00t\x00t\x00p") print "[+] Offset to first link: 0x%x" %(ole_link_offt)
def compress(self, data): return pylzma.compress(data)[5:]
def test_compress_large_string(self): # decompress large block of repeating data, string version (bug reported by Christopher Perkins) data = bytes("asdf", 'ascii')*123456 compressed = pylzma.compress(data) self.failUnless(data == pylzma.decompress(compressed))
def _file_lzma(cls, data, header=RAW): lzma_data = pylzma.compress(data) return cls.LZMA + header + str(len(lzma_data)) + cls.PADDING + lzma_data