def execute(arguments): _validate_arguments(arguments) _enrich_arguments(arguments) compress_value = arguments[_compress_command] read_file_value = arguments[_read_file_parameter] write_file_value = arguments[_write_file_parameter] lzw_value = arguments[_lzw_option] elias_value = arguments[_elias_option] code_value = arguments[_code_option] if compress_value: if lzw_value: lzw.compress(read_file_value, write_file_value) elif elias_value: elias.compress(read_file_value, write_file_value, code_type=code_value) else: if lzw_value: lzw.decompress(read_file_value, write_file_value) elif elias_value: elias.decompress(read_file_value, write_file_value, code_type=code_value)
def calcular_distancia(buscados, comprimidos): for archivo in os.listdir(buscados): bytes_archivo = lzw.readbytes(os.path.abspath(buscados + "/" + archivo)) # X archivo_comprimido = lzw.compress(bytes_archivo) # C(X) for comprimido in os.listdir(comprimidos): bytes_comprimido = lzw.readbytes(os.path.abspath(comprimidos + "/" + comprimido)) # C(Y) bytes_descomprimido = lzw.decompress(bytes_comprimido) # C(Y)
def decompress(option, encoded_file, decoded_file): global byteArr global bitPosition if option == "1": print "no compression" file = open(decoded_file, "w") file.write(open(encoded_file, "r").read()) file.close() if option == "2": # read the whole input file into a byte array fileSize = os.path.getsize(str(os.path.abspath((encoded_file)))) fi = open(encoded_file, "rb") # byteArr = map(ord, fi.read(fileSize)) byteArr = bytearray(fi.read(fileSize)) fi.close() fileSize = len(byteArr) print "File size in bytes:", fileSize bitPosition = 0 n = int(bitReader(8), 2) + 1 # first read the number of encoding tuples # print 'Number of encoding tuples:', n dic = dict() for i in range(n): # read the byteValue byteValue = int(bitReader(8), 2) # read 3-bit(len(encodingBitStr)-1) value m = int(bitReader(3), 2) + 1 # read encodingBitStr encodingBitStr = bitReader(m) dic[encodingBitStr] = byteValue # add to the dictionary # print 'The dictionary of encodingBitStr : byteValue pairs:' # print dic # print # read 32-bit file size (number of encoded bytes) value numBytes = long(bitReader(32), 2) + 1 print "Number of bytes to decode:", numBytes # read the encoded data, decode it, write into the output file fo = open(decoded_file, "wb") for b in range(numBytes): # read bits until a decoding match is found encodingBitStr = "" while True: encodingBitStr += bitReader(1) if encodingBitStr in dic: byteValue = dic[encodingBitStr] fo.write(chr(byteValue)) break fo.close() if option == "3": newbytes = b"".join(lzw.decompress(lzw.readbytes(encoded_file))) decoded = open(decoded_file, "w") decoded.write(newbytes) print "LZW decoding num of bytes: " + str(newbytes.__sizeof__()) if option == "4": ar = arcode.ArithmeticCode(False) ar.decode_file(encoded_file, decoded_file)
def verify_compressed_file(self, testfile=GIANT_FILE): with tempfile.TemporaryFile("w+b") as compressedfile: originalsize = 0 compressedsize = 0 uncompressedsize = 0 bigstream = lzw.readbytes(testfile) compressed = lzw.compress(bigstream) for bs in compressed: compressedsize = compressedsize + 1 compressedfile.write(bs) ############################ compressedfile.flush() compressedfile.seek(0) checkstream = lzw.readbytes(testfile) uncompressed = lzw.decompress(lzw.filebytes(compressedfile)) for oldbyte, newbyte in six.moves.zip_longest(checkstream, uncompressed): uncompressedsize = uncompressedsize + 1 if oldbyte != newbyte: msg = "Corrupted byte at {0}, original {1} != {2}".format(uncompressedsize, oldbyte, newbyte) self.assertEquals(oldbyte, newbyte, msg)
def verify_compressed_file(self, testfile=GIANT_FILE): with tempfile.TemporaryFile("w+b") as compressedfile: originalsize = 0 compressedsize = 0 uncompressedsize = 0 bigstream = lzw.readbytes(testfile) compressed = lzw.compress(bigstream) for bs in compressed: compressedsize = compressedsize + 1 compressedfile.write(bs) ############################ compressedfile.flush() compressedfile.seek(0) checkstream = lzw.readbytes(testfile) uncompressed = lzw.decompress(lzw.filebytes(compressedfile)) for oldbyte, newbyte in six.moves.zip_longest( checkstream, uncompressed): uncompressedsize = uncompressedsize + 1 if oldbyte != newbyte: msg = "Corrupted byte at {0}, original {1} != {2}".format( uncompressedsize, oldbyte, newbyte) self.assertEquals(oldbyte, newbyte, msg)
def pic_from_data(self, data): self.width, self.height, self.mode = unpack('HHB', data[0:5]) rle_data = lzw.decompress(map(ord, data[5:]), self.mode) data = rle.decode(rle_data) self.pic = [] for y in xrange(0, self.height): self.pic.append(data[y*self.width:y*self.width + self.width]) print 'Pic read', self.width, 'x', self.height, '(', self.mode, ')'
def test_compressdecompress(self): english = self.english gibberish = self.gibberish compressed = lzw.compress(english) compressed = [ b for b in compressed ] decompressed = b"".join(lzw.decompress(compressed)) self.assertEqual(english, decompressed) compressed = lzw.compress(gibberish) compressed = [ b for b in compressed ] decompressed = b"".join(lzw.decompress(compressed)) self.assertEqual(gibberish, decompressed)
def post_dissect(self, s): if not conf.contribs["http"]["auto_compression"]: return s encodings = self._get_encodings() # Un-chunkify if "chunked" in encodings: data = b"" while s: length, _, body = s.partition(b"\r\n") try: length = int(length, 16) except ValueError: # Not a valid chunk. Ignore break else: load = body[:length] if body[length:length + 2] != b"\r\n": # Invalid chunk. Ignore break s = body[length + 2:] data += load if not s: s = data # Decompress try: if "deflate" in encodings: import zlib s = zlib.decompress(s) elif "gzip" in encodings: s = gzip_decompress(s) elif "compress" in encodings: import lzw s = lzw.decompress(s) elif "br" in encodings: if _is_brotli_available: s = brotli.decompress(s) else: log_loading.info( "Can't import brotli. brotli decompression " "will be ignored !") elif "zstd" in encodings: if _is_zstd_available: # Using its streaming API since its simple API could handle # only cases where there is content size data embedded in # the frame bio = io.BytesIO(s) reader = zstandard.ZstdDecompressor().stream_reader(bio) s = reader.read() else: log_loading.info( "Can't import zstandard. zstd decompression " "will be ignored !") except Exception: # Cannot decompress - probably incomplete data pass return s
def extract_config(lzw_file, zyxel_file, r): # todo header_size = 60 print("[+] Stripping header") with open(lzw_file, 'wb') as f: r.raw.decode_content = True f.write(r.content[header_size:]) try: mybytes = lzw.readbytes(lzw_file) lbytes = lzw.decompress(mybytes) lzw.writebytes(zyxel_file, lbytes) except: pass
def post_dissect(self, s): if not conf.contribs["http"]["auto_compression"]: return s encodings = self._get_encodings() # Un-chunkify if "chunked" in encodings: data = b"" while s: length, _, body = s.partition(b"\r\n") try: length = int(length, 16) except ValueError: # Not a valid chunk. Ignore break else: load = body[:length] if body[length:length + 2] != b"\r\n": # Invalid chunk. Ignore break s = body[length + 2:] data += load if not s: s = data # Decompress try: if "deflate" in encodings: import zlib s = zlib.decompress(s) elif "gzip" in encodings: s = gzip_decompress(s) elif "compress" in encodings: import lzw s = lzw.decompress(s) elif "br" in encodings: if _is_brotli_available: s = brotli.decompress(s) else: log_loading.info( "Can't import brotli. brotli decompression " "will be ignored !" ) except Exception: # Cannot decompress - probably incomplete data pass return s
def test_compress_decompress_2(): s = "rererere" cmp_s, _, dico = compress(s) res = decompress(cmp_s, dico) assert res == s
def read_tiff(filename): with open(filename, "rb") as tifffile: header = tifffile.read(4) assert header == TIFF_HEADER, "TIFF header not found" directorystart = read_uint32(tifffile) log.debug("directory start at 0x%x", directorystart) tifffile.seek(directorystart) directorylength = read_uint16(tifffile) log.debug("directory length: %d", directorylength) directory = {} for i in range(directorylength): tifffile.seek(directorystart + 2 + DIRECTORY_ENTRY_LENGTH * i) tag = read_uint16(tifffile) valuetype = read_uint16(tifffile) length = read_uint32(tifffile) typelen = VALUETYPE[valuetype][1] if length * typelen > 4: pointer = read_uint32(tifffile) tifffile.seek(pointer) raw_values = struct.unpack("<" + length * VALUETYPE[valuetype][0], tifffile.read(length * typelen)) values = VALUETYPE[valuetype][2](raw_values) assert tag in __FIELD_BY_ID_MAP, "Tag 0x%x not found" % tag fielddata = __FIELD_BY_ID_MAP[tag] value = fielddata[2](values) if len(fielddata) == 4: assert value in fielddata[3], \ "Value not acceptable: %s %s" % (fielddata[0], repr(value)) directory[fielddata[0]] = value log.debug("Found tag: %s = %s", fielddata[0], repr(value)) assert len(directory["bitspersample"]) == directory["samplesperpixel"] assert len(directory["sampleformat"]) == directory["samplesperpixel"] nrstrips = int(math.ceil(float(directory["height"]) / directory["rowsperstrip"])) assert len(directory["stripoffsets"]) == nrstrips assert len(directory["stripbytecounts"]) == nrstrips notpresentfields = set([x[0] for x in FIELD]) - set(directory.keys()) for fieldname in notpresentfields: assert fieldname in optionalfields, "Field %s missing" % fieldname imageasstring = "" # read the strips for stripnr in range(nrstrips): nrrows = min(directory["height"] - directory["rowsperstrip"] * stripnr, directory["rowsperstrip"]) nrpixels = directory["width"] * nrrows nrbytes = nrpixels * sum(directory["bitspersample"]) / 8 tifffile.seek(directory["stripoffsets"][stripnr]) assert directory["compression"] == 5 # lzw lzwstrip = tifffile.read(directory["stripbytecounts"][stripnr]) predictedstrip = numpy.fromstring(lzw.decompress(lzwstrip), dtype=numpy.uint8) assert len(predictedstrip) == nrbytes assert directory["predictor"] == 3 # float predictor # undo prediction in two steps: # first add the value or the previous 4 columns to the next # over the whole strip (should probably optimise) width = directory["width"] nrchannels = directory["samplesperpixel"] bytespersample = directory["bitspersample"][0] / 8 assert len(set(directory["bitspersample"])) == 1, \ "Images with different number of bits per sample per channel" \ " not supported" cumsummedstrip = (predictedstrip.reshape( (nrrows, width * bytespersample, nrchannels)). cumsum(1, dtype=numpy.uint8)) # then re-arrange the value. if the width is 640 and 4 float32 per # pixel strip = (cumsummedstrip.reshape( # reverse the order of columns, to go from bit to little endian (nrrows, bytespersample, width * nrchannels))[:, ::-1, :]. # transpose in the second dimension to descramble the parts # of the individual floats transpose(0, 2, 1).flatten().tostring()) imageasstring += strip flatimage = numpy.fromstring(imageasstring, dtype=numpy.float32) return flatimage.reshape((directory["height"], directory["width"], directory["samplesperpixel"]))
def lzwDecode(stream, parameters): decodedStream = '' try: generator = lzw.decompress(stream) for c in generator: decodedStream += c except: return (-1,'Error decompressing string') if parameters == None or parameters == {}: return (0,decodedStream) else: if parameters.has_key('/Predictor'): predictor = parameters['/Predictor'].getRawValue() else: predictor = None if parameters.has_key('/Columns'): columns = parameters['/Columns'].getRawValue() else: columns = None if parameters.has_key('/Colors'): colors = parameters['/Colors'].getRawValue() else: colors = None if parameters.has_key('/BitsPerComponent'): bits = parameters['/BitsPerComponent'].getRawValue() else: bits = None if parameters.has_key('/EarlyChange'): earlyChange = parameters['/EarlyChange'].getRawValue() else: earlyChange = None if predictor != None and predictor != 1: # PNG prediction: if predictor >= 10 and predictor <= 15: output = '' # PNG prediction can vary from row to row rowlength = columns + 1 prev_rowdata = (0,) * rowlength for row in xrange(len(decodedStream) / rowlength): rowdata = [ord(x) for x in decodedStream[(row*rowlength):((row+1)*rowlength)]] filterByte = rowdata[0] if filterByte == 0: pass elif filterByte == 1: for i in range(2, rowlength): rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256 elif filterByte == 2: for i in range(1, rowlength): rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256 else: # unsupported PNG filter #sys.exit("Unsupported PNG filter %r" % filterByte) return (-1,'Unsupported parameters') prev_rowdata = rowdata output += (''.join([chr(x) for x in rowdata[1:]])) return (0,output) else: # unsupported predictor #sys.exit("Unsupported flatedecode predictor %r" % predictor) return (-1,'Unsupported parameters')
def test_compress_decompress_3(): s = "coucou" cmp_s, _, dico = compress(s) res = decompress(cmp_s, dico) assert res == s
def decode(self, data): assert self.getParams()['EarlyChange']==1 data = lzw.decompress(data) data = Predictor(p['Predictor'],p['Columns'],p['BitsPerComponent']).decode(data) return data
def test_compress_decompress_4(): s = "pourquoi pas" cmp_s, _, dico = compress(s) res = decompress(cmp_s, dico) assert res == s
import lzw infile = lzw.readbytes("3_1.spc") compressed = lzw.compress(infile) lzw.writebytes("3_1.spc.compressed", compressed) infile.close() infile = lzw.readbytes("3_1.spc.compressed") uncompressed = lzw.decompress(infile) lzw.writebytes("3_1.spc.decompressed", uncompressed) infile.close()
import lzw mybytes = lzw.readbytes("ElQuijote.txt") lessbytes = lzw.compress(mybytes) outFile = open("Compressed.txt", 'wb') outFile.write(b"".join(lessbytes)) outFile.close() newbytes = b"".join(lzw.decompress(lessbytes)) oldbytes = b"".join(lzw.readbytes("ElQuijote.txt")) print(oldbytes == newbytes)
def test_compress_decompress_1(): s = "ab*cde*fgh*" cmp_s, _, dico = compress(s) res = decompress(cmp_s, dico) assert res == s
def decode(self, data): assert self.getParams()['EarlyChange'] == 1 data = lzw.decompress(data) data = Predictor(p['Predictor'], p['Columns'], p['BitsPerComponent']).decode(data) return data
def decode(data): assert self.getParams()["EarlyChange"] == 1 data = lzw.decompress(data) data = Predictor(p["Predictor"], p["Columns"], p["BitsPerComponent"]).decode(data) return data
def lzwDecode(stream, parameters): decodedStream = '' try: generator = lzw.decompress(stream) for c in generator: decodedStream += c except: return (-1, 'Error decompressing string') if parameters == None or parameters == {}: return (0, decodedStream) else: if parameters.has_key('/Predictor'): predictor = parameters['/Predictor'].getRawValue() else: predictor = None if parameters.has_key('/Columns'): columns = parameters['/Columns'].getRawValue() else: columns = None if parameters.has_key('/Colors'): colors = parameters['/Colors'].getRawValue() else: colors = None if parameters.has_key('/BitsPerComponent'): bits = parameters['/BitsPerComponent'].getRawValue() else: bits = None if parameters.has_key('/EarlyChange'): earlyChange = parameters['/EarlyChange'].getRawValue() else: earlyChange = None if predictor != None and predictor != 1: # PNG prediction: if predictor >= 10 and predictor <= 15: output = '' # PNG prediction can vary from row to row rowlength = columns + 1 prev_rowdata = (0, ) * rowlength for row in xrange(len(decodedStream) / rowlength): rowdata = [ ord(x) for x in decodedStream[(row * rowlength):((row + 1) * rowlength)] ] filterByte = rowdata[0] if filterByte == 0: pass elif filterByte == 1: for i in range(2, rowlength): rowdata[i] = (rowdata[i] + rowdata[i - 1]) % 256 elif filterByte == 2: for i in range(1, rowlength): rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256 else: # unsupported PNG filter #sys.exit("Unsupported PNG filter %r" % filterByte) return (-1, 'Unsupported parameters') prev_rowdata = rowdata output += (''.join([chr(x) for x in rowdata[1:]])) return (0, output) else: # unsupported predictor #sys.exit("Unsupported flatedecode predictor %r" % predictor) return (-1, 'Unsupported parameters')
def test_compress_decompress_5(): s = "abcd*dccacbdda*aaddcba*" cmp_s, _, dico = compress(s) res = decompress(cmp_s, dico) assert res == s
import lzw print("Compressing text...") print() compressed = lzw.compress('darth_plagueis.txt') print("Compressed text: ") print(compressed) print() print("Decompressing text...") lzw.decompress(compressed, 'darth_plagueis_out.txt')
def decompress(): lzw.decompress( r'D:\workspace.python\data-compressor\files\wap_compressed.txt', r'D:\workspace.python\data-compressor\files\wap_compressed_decompressed.txt', )
def main(): fn_in = sys.argv[1] fn_out = sys.argv[2] print("Decompressing from " + fn_in + " to " + fn_out) compr = string_to_compressed_code(read_file(fn_in)) write_file(fn_out, decompress(compr))
def test_property(x): assert lzw.decompress(lzw.compress(x)) == x