def verify_compressed_file(self, testfile=GIANT_FILE): with tempfile.TemporaryFile("w+b") as compressedfile: originalsize = 0 compressedsize = 0 uncompressedsize = 0 bigstream = lzw.readbytes(testfile) compressed = lzw.compress(bigstream) for bs in compressed: compressedsize = compressedsize + 1 compressedfile.write(bs) ############################ compressedfile.flush() compressedfile.seek(0) checkstream = lzw.readbytes(testfile) uncompressed = lzw.decompress(lzw.filebytes(compressedfile)) for oldbyte, newbyte in six.moves.zip_longest(checkstream, uncompressed): uncompressedsize = uncompressedsize + 1 if oldbyte != newbyte: msg = "Corrupted byte at {0}, original {1} != {2}".format(uncompressedsize, oldbyte, newbyte) self.assertEquals(oldbyte, newbyte, msg)
def verify_compressed_file(self, testfile=GIANT_FILE): with tempfile.TemporaryFile("w+b") as compressedfile: originalsize = 0 compressedsize = 0 uncompressedsize = 0 bigstream = lzw.readbytes(testfile) compressed = lzw.compress(bigstream) for bs in compressed: compressedsize = compressedsize + 1 compressedfile.write(bs) ############################ compressedfile.flush() compressedfile.seek(0) checkstream = lzw.readbytes(testfile) uncompressed = lzw.decompress(lzw.filebytes(compressedfile)) for oldbyte, newbyte in six.moves.zip_longest( checkstream, uncompressed): uncompressedsize = uncompressedsize + 1 if oldbyte != newbyte: msg = "Corrupted byte at {0}, original {1} != {2}".format( uncompressedsize, oldbyte, newbyte) self.assertEquals(oldbyte, newbyte, msg)
def calcular_distancia(buscados, comprimidos): for archivo in os.listdir(buscados): bytes_archivo = lzw.readbytes(os.path.abspath(buscados + "/" + archivo)) # X archivo_comprimido = lzw.compress(bytes_archivo) # C(X) for comprimido in os.listdir(comprimidos): bytes_comprimido = lzw.readbytes(os.path.abspath(comprimidos + "/" + comprimido)) # C(Y) bytes_descomprimido = lzw.decompress(bytes_comprimido) # C(Y)
def decompress(option, encoded_file, decoded_file): global byteArr global bitPosition if option == "1": print "no compression" file = open(decoded_file, "w") file.write(open(encoded_file, "r").read()) file.close() if option == "2": # read the whole input file into a byte array fileSize = os.path.getsize(str(os.path.abspath((encoded_file)))) fi = open(encoded_file, "rb") # byteArr = map(ord, fi.read(fileSize)) byteArr = bytearray(fi.read(fileSize)) fi.close() fileSize = len(byteArr) print "File size in bytes:", fileSize bitPosition = 0 n = int(bitReader(8), 2) + 1 # first read the number of encoding tuples # print 'Number of encoding tuples:', n dic = dict() for i in range(n): # read the byteValue byteValue = int(bitReader(8), 2) # read 3-bit(len(encodingBitStr)-1) value m = int(bitReader(3), 2) + 1 # read encodingBitStr encodingBitStr = bitReader(m) dic[encodingBitStr] = byteValue # add to the dictionary # print 'The dictionary of encodingBitStr : byteValue pairs:' # print dic # print # read 32-bit file size (number of encoded bytes) value numBytes = long(bitReader(32), 2) + 1 print "Number of bytes to decode:", numBytes # read the encoded data, decode it, write into the output file fo = open(decoded_file, "wb") for b in range(numBytes): # read bits until a decoding match is found encodingBitStr = "" while True: encodingBitStr += bitReader(1) if encodingBitStr in dic: byteValue = dic[encodingBitStr] fo.write(chr(byteValue)) break fo.close() if option == "3": newbytes = b"".join(lzw.decompress(lzw.readbytes(encoded_file))) decoded = open(decoded_file, "w") decoded.write(newbytes) print "LZW decoding num of bytes: " + str(newbytes.__sizeof__()) if option == "4": ar = arcode.ArithmeticCode(False) ar.decode_file(encoded_file, decoded_file)
def comprimir_archivos(origen, destino): for archivo in os.listdir(origen): comprimir = lzw.readbytes(os.path.abspath(origen + "/" + archivo)) out = lzw.compress(comprimir) archivo_destino = os.path.abspath(destino + "/" + archivo + ".compressed") lzw.writebytes(archivo_destino, out)
def test_readbytes(self): realbytes = None with open(ENGLISH_FILE, "rb") as inf: realbytes = inf.read() testbytes = b"".join(lzw.readbytes(ENGLISH_FILE)) for (old,new) in six.moves.zip_longest(realbytes, testbytes): self.assertEqual(old, new)
def test_readbytes(self): realbytes = None with open(ENGLISH_FILE, "rb") as inf: realbytes = inf.read() testbytes = b"".join(lzw.readbytes(ENGLISH_FILE)) for (old, new) in six.moves.zip_longest(realbytes, testbytes): self.assertEqual(old, new)
def extract_config(lzw_file, zyxel_file, r): # todo header_size = 60 print("[+] Stripping header") with open(lzw_file, 'wb') as f: r.raw.decode_content = True f.write(r.content[header_size:]) try: mybytes = lzw.readbytes(lzw_file) lbytes = lzw.decompress(mybytes) lzw.writebytes(zyxel_file, lbytes) except: pass
def lzw_algm(filename): start_time = time.time() try: outfilename = filename + '.lzw' comp_time = timeit.default_timer() file_bytes = lzw.readbytes(filename) tot_time = timeit.default_timer() - comp_time print("reading time", tot_time) compressed = lzw.compress(file_bytes) comp_time = timeit.default_timer() lzw.writebytes(outfilename, compressed) tot_time = timeit.default_timer() - comp_time print("writing time", tot_time) finally: print("")
import lzw infile = lzw.readbytes("3_1.spc") compressed = lzw.compress(infile) lzw.writebytes("3_1.spc.compressed", compressed) infile.close() infile = lzw.readbytes("3_1.spc.compressed") uncompressed = lzw.decompress(infile) lzw.writebytes("3_1.spc.decompressed", uncompressed) infile.close()
import lzw mybytes = lzw.readbytes("ElQuijote.txt") lessbytes = lzw.compress(mybytes) outFile = open("Compressed.txt", 'wb') outFile.write(b"".join(lessbytes)) outFile.close() newbytes = b"".join(lzw.decompress(lessbytes)) oldbytes = b"".join(lzw.readbytes("ElQuijote.txt")) print(oldbytes == newbytes)
def compressToOutputFile(input_file, output_file_name, option): print ("fileName: " + output_file_name) print ("option: " + str(option)) global tupleList global bitStream if option == 1: file = open(output_file_name, 'w') file.write(open(input_file,'r').read()) file.close() print "Size of input file in bytes: " print_file_size(input_file) print "Size of output file ("+ output_file_name +") in bytes: " print_file_size(output_file_name) if option == 2: # read the whole input file into a byte array fileSize = os.path.getsize(str(os.path.abspath((input_file)))) fi = open(input_file, 'rb') # byteArr = map(ord, fi.read(fileSize)) byteArr = bytearray(fi.read(fileSize)) fi.close() fileSize = len(byteArr) print "Size of input file in bytes: ", fileSize # calculate the total number of each byte value in the file freqList = [0] * 256 for b in byteArr: freqList[b] += 1 # create a list of (frequency, byteValue, encodingBitStr) tuples tupleList = [] for b in range(256): if freqList[b] > 0: tupleList.append((freqList[b], b, '')) # sort the list according to the frequencies descending tupleList = sorted(tupleList, key=lambda tup: tup[0], reverse = True) shannon_fano_encoder(0, len(tupleList) - 1) # print 'The list of (frequency, byteValue, encodingBitStr) tuples:' # print tupleList # print # create a dictionary of byteValue : encodingBitStr pairs dic = dict([(tup[1], tup[2]) for tup in tupleList]) del tupleList # unneeded anymore # print 'The dictionary of byteValue : encodingBitStr pairs:' # print dic # write a list of (byteValue,3-bit(len(encodingBitStr)-1),encodingBitStr) # tuples as the compressed file header bitStream = '' fo = open(output_file_name, 'wb') fo.write(chr(len(dic) - 1)) # first write the number of encoding tuples for (byteValue, encodingBitStr) in dic.iteritems(): # convert the byteValue into 8-bit and send to be written into file bitStr = bin(byteValue) bitStr = bitStr[2:] # remove 0b bitStr = '0' * (8 - len(bitStr)) + bitStr # add 0's if needed for 8 bits byteWriter(bitStr, fo) # convert len(encodingBitStr) to 3-bit and send to be written into file bitStr = bin(len(encodingBitStr) - 1) # 0b0 to 0b111 bitStr = bitStr[2:] # remove 0b bitStr = '0' * (3 - len(bitStr)) + bitStr # add 0's if needed for 3 bits byteWriter(bitStr, fo) # send encodingBitStr to be written into file byteWriter(encodingBitStr, fo) # write 32-bit (input file size)-1 value bitStr = bin(fileSize - 1) bitStr = bitStr[2:] # remove 0b bitStr = '0' * (32 - len(bitStr)) + bitStr # add 0's if needed for 32 bits byteWriter(bitStr, fo) # write the encoded data for b in byteArr: byteWriter(dic[b], fo) byteWriter('0' * 8, fo) # to write the last remaining bits (if any) fo.close() print "Size of compressed putput file ("+ output_file_name +") in bytes: " print_file_size(output_file_name) if option == 3: print "Size of input file in bytes: " print_file_size(input_file) mybytes = lzw.readbytes(input_file) lessbytes = lzw.compress(mybytes) lzw.writebytes(output_file_name, lessbytes) print "Size of compressed putput file ("+ output_file_name +") in bytes: " print_file_size(output_file_name) if option == 4: print "Size of input file in bytes: " print_file_size(input_file) ar = arcode.ArithmeticCode(False) ar.encode_file(input_file, output_file_name) print "Size of compressed putput file ("+ output_file_name +") in bytes: " print_file_size(output_file_name)
import lzw import os import plotly.plotly as py import plotly.graph_objs as go print "LZW" inputf = lzw.readbytes('uncompressed.txt') compressed = lzw.compress(inputf) lzw.writebytes('compressed.lzw', compressed) ratio = os.stat('uncompressed.txt').st_size / float( os.stat('compressed.lzw').st_size) print "Compression Ratio : ", ratio os.system( "python ./arithmetic/adaptive-arithmetic-compress.py uncompressed.txt compressed.bin" ) ratio2 = os.stat('uncompressed.txt').st_size / float( os.stat('compressed.bin').st_size) print "Adaptive Arithmetic coding" print "Compression Ratio : ", ratio2 labels = ['Lempel-Ziv', 'arithmetic'] values = [ratio, ratio2] trace = go.Pie(labels=labels, values=values) py.iplot([trace], filename='abedmurrar15')