Beispiel #1
0
def decompress(input):
    with open(input, 'rb') as file:
        data = file.read()

    data = huffman.decode(data)

    shape, data = struct.unpack('iii', data[:12]), data[12:]
    (Y_length, Cb_length,
     Cr_length), data = struct.unpack('iii', data[:12]), data[12:]

    fmt = '%us%us%us' % (Y_length, Cb_length, Cr_length)
    (Y_data, Cb_data, Cr_data) = struct.unpack(fmt, data)

    # Channel decompression

    Y = decompress_channel(Y_data)
    Cb = decompress_channel(Cb_data)
    Cr = decompress_channel(Cr_data)

    # Chroma resampling

    Cb = np.repeat(Cb, CS, 0)
    Cb = np.repeat(Cb, CS, 1)
    Cr = np.repeat(Cr, CS, 0)
    Cr = np.repeat(Cr, CS, 1)

    decoded_image = np.empty((shape[0], shape[1], 3), dtype='uint8')
    decoded_image[:, :, 0] = Y[:shape[0], :shape[1]]
    decoded_image[:, :, 1] = Cb[:shape[0], :shape[1]]
    decoded_image[:, :, 2] = Cr[:shape[0], :shape[1]]

    decoded_image = convert(decoded_image, 'YCbCr', 'RGB')

    return decoded_image
Beispiel #2
0
 def test_decoding_real_world(self):
     frequency_map = get_real_world_input()
     tree = build_tree(frequency_map)
     text = get_real_world_text()
     encoded = encode(text, tree)
     decoded = decode(encoded, tree)
     self.assertEqual(text, decoded)
Beispiel #3
0
def decompress(compressed, uncompressed):
    '''First, read a Huffman tree from the 'compressed' stream using your
    read_tree function. Then use that tree to decode the rest of the
    stream and write the resulting symbols to the 'uncompressed'
    stream.

    Args:
      compressed: A file stream from which compressed input is read.
      uncompressed: A writable file stream to which the uncompressed
          output is written.

    '''
    comp = BitReader(compressed)
    uncomp = BitWriter(uncompressed)

    tree = read_tree(comp)

    while True:
        try:
            uncomp_byte = huffman.decode(tree, comp)
            if uncomp_byte == None:
                raise EOFError
            uncomp.writebits(uncomp_byte, 8)

        except EOFError:
            uncomp.writebits(29, 8)
            break
Beispiel #4
0
 def test_decoding(self):
     frequency_map = get_wikipedia_input()
     tree = build_tree(frequency_map)
     samples = ['a', 'abc', 'adeaddadcededabadbabeabeadedabacabed']
     for text in samples:
         encoded = encode(text, tree)
         decoded = decode(encoded, tree)
         self.assertEqual(text, decoded)
Beispiel #5
0
def decompress(compressed, uncompressed):
    bitreader = bitio.BitReader(compressed)
    bitwriter = bitio.BitWriter(uncompressed)
    tree = read_tree(bitreader)
    # Repeatedly read coded bits from the file, decode them using tree
    while True:
        decoded = huffman.decode(tree, bitreader)
        # As soon as you decode the end-of-message symbol, you should stop reading.
        if decoded is None:
            break
        # write the decoded byte to the uncompressed output
        bitwriter.writebits(decoded, 8)
Beispiel #6
0
def decode(filename_in, filename_out):
    with open(filename_in, 'rb') as fi:
        n_header = int.from_bytes(fi.read(8), 'big')
        u = int.from_bytes(fi.read(1), 'big')
        header = fi.read(n_header)
        data = fi.read()
        freq = pickle.loads(header)
        tree = huffman.build_tree(freq)
        map_code = huffman.build_map_code(tree)
        out = huffman.decode(data, map_code, u)
        with open(filename_out, 'wb') as fo:
            fo.write(out)
Beispiel #7
0
def test_decode_reverses_encode_long():
    string = """In computer science and information theory, Huffman coding is
    an entropy encoding algorithm used for lossless data compression. The term
    refers to the use of a variable-length code table for encoding a source
    symbol (such as a character in a file) where the variable-length code table
    has been derived in a particular way based on the estimated probability of
    occurrence for each possible value of the source symbol. It was developed
    by David A. Huffman while he was a Ph.D. student at MIT, and published in
    the 1952 paper "A Method for the Construction of Minimum-Redundancy Codes.
    """
    tree = encode(TEST_FILE, string)
    decoded = decode(TEST_FILE, tree)
    assert decoded.startswith(string)
Beispiel #8
0
 def test_encode_text(self):
     print('test_encode_text')
     huffman.DEBUG = True
     print('huffman.DEBUG=' + str(huffman.DEBUG))
     path = self.filepath
     with open(path, 'r+') as f:
         eof = False
         while not eof:
             line = f.readline()
             if len(line) == 0:
                 eof = True
                 break
             coded = huffman.encode(line)
             decoded = huffman.decode(coded[0], coded[1])
             self.assertEqual(line, decoded)
def decompress(compressed, uncompressed):
    '''First, read a Huffman tree from the 'compressed' stream using your
    read_tree function. Then use that tree to decode the rest of the
    stream and write the resulting symbols to the 'uncompressed'
    stream.

    Args:
      compressed: A file stream from which compressed input is read.
      uncompressed: A writable file stream to which the uncompressed
          output is written.

    '''
    bitstream = bitio.BitReader(compressed)  # Gets bits from compressed
    tree = read_tree(bitstream)  # Produce tree based on bit sequence
    while True:  # Do final decoding of tree based on remaining bits
        val = huffman.decode(tree, bitstream)
        if val is None:  # Stop at endLead
            break
        else:  # Write the stored values in the tree (ordered by bit sequence)
            uncompressed.write(bytes([val]))  # as a byte in uncompressed
Beispiel #10
0
def decompress(compressed, uncompressed):
    '''First, read a Huffman tree from the 'compressed' stream using your
    read_tree function. Then use that tree to decode the rest of the
    stream and write the resulting symbols to the 'uncompressed'
    stream.

    Args:
      compressed: A file stream from which compressed input is read.
      uncompressed: A writable file stream to which the uncompressed
          output is written.

    '''
    bitreader = bitio.BitReader(compressed)
    tree = read_tree(bitreader)
    message = list()  # the list of ascii codes to be added to
    while True:
        char = huffman.decode(tree, bitreader)  # decode the character
        if char == None:  # if it's an endmessage, add to the list, exit
            break
        message.append(char)  # otherwise add to the list
    # writes the message to the writable file stream

    uncompressed.write(bytes(message))
def main():
    txt = 1000 * open('README').read()

    t0 = time()
    freq = Counter(txt)
    print('count:     %9.6f sec' % (time() - t0))

    t0 = time()
    tree = huffTree(freq)
    print('tree:      %9.6f sec' % (time() - t0))

    write_dot(tree, 'tree.dot')
    code = huffCode(tree)
    # create tree from code (no frequencies)
    write_dot(make_tree(code), 'tree_raw.dot')

    a = bitarray()

    t0 = time()
    a.encode(code, txt)
    print('C encode:  %9.6f sec' % (time() - t0))

    # Time the decode function above
    t0 = time()
    res = decode(tree, a)
    Py_time = time() - t0
    assert ''.join(res) == txt
    print('Py decode: %9.6f sec' % Py_time)

    # Time the decode method which is implemented in C
    t0 = time()
    res = a.decode(code)
    assert ''.join(res) == txt
    C_time = time() - t0
    print('C decode:  %9.6f sec' % C_time)

    print('Ratio: %f' % (Py_time / C_time))
Beispiel #12
0
def decompress (compressed, uncompressed):
    '''First, read a Huffman tree from the 'compressed' stream using your
    read_tree function. Then use that tree to decode the rest of the
    stream and write the resulting symbols to the 'uncompressed'
    stream.

    Args:
      compressed: A file stream from which compressed input is read.
      uncompressed: A writable file stream to which the uncompressed
          output is written.
    '''
    #reader reads from the encoded compressed file
    reader = bitio.BitReader(compressed)
    #writer writes to a decoded uncompressed file
    writer = bitio.BitWriter(uncompressed)
    #the huffman tree read containing the decoding info
    tree = read_tree(reader)
    while True:
        current_element = huffman.decode(tree, reader)
        #when TreeLeafEndMessage is reached close file
        if current_element == None:
            break
        else:
            writer.writebits(current_element, 8)
Beispiel #13
0
textCodes = getCode(textTree)
print("\nHuffman code for text data:")
for (key, value) in textCodes.items():
    print(key, '\t', value)

# let's encode the tale
textBinary = encode(textData, textCodes)
print("\nEncoded text data:")
print("%s -------> %s" % (textData, textBinary))
print("Average length (bits per character): ", len(textBinary) / len(textData))
# TODO: to compare average length to entropy, must implement getEntropy()
print("PART A - Entropy:", getEntropy(textFreqs))
print("PART B - The ceiling of entropy and average code length are equal")
# TODO: to decode messages, must implement decode()
messageEncoded = '0110000101010010111100011001111110100101100101001011110'
messageDecoded = decode(messageEncoded, textTree)
print('\nPART C, D - Decoded message:', "".join(messageDecoded))

print("\n\n-----Web session lengths.------")

# construct the frequency dictionary
sessionLengths = np.load("sessionLengths.npy")
webFreqs = {}
for i in range(len(sessionLengths)):
    webFreqs[i + 1] = sessionLengths[i]
print('PART E - Entropy:', getEntropy(webFreqs))
# compute and plot session length probabilities
webProbs = sessionLengths / np.sum(sessionLengths)

plt.semilogy(range(1, 101), webProbs)
plt.xlabel('Web Session Length')
Beispiel #14
0
def test_decode_reverses_encode_special():
    string = '! %'
    tree = encode(TEST_FILE, string)
    decoded = decode(TEST_FILE, tree)
    assert decoded.startswith(string)
Beispiel #15
0
def test_decode_reverses_encode_simple():
    string = 'abbb'
    tree = encode(TEST_FILE, string)
    decoded = decode(TEST_FILE, tree)
    assert decoded.startswith(string)
Beispiel #16
0

decoded_message_uint8 = np.array(
    [ord(c) for c in decoded_message.getvalue()], dtype=np.uint8)

# Overbodige data wissen
te_vertwijderen_nullen = len(decoded_message_uint8) - initiele_lengte
print("Er moeten ", te_vertwijderen_nullen,"bits verwijderd worden")

decoded_message_uint8 = decoded_message_uint8[:-te_vertwijderen_nullen or None]
#print("decoded_message_uint8: ", decoded_message_uint8)
#print("Lengte hiervan is:", (8*len(decoded_message_uint8)),"bits")
print("Het verschil voor en na kanaaldecodering en verwijderde bits is: ", len(decoded_message_uint8) - initiele_lengte)


# ======================= SOURCE DECODING ========================
# =========================== Huffman ============================
print("-------START HUFFMAN DECODING-------")

print("lengte chan decoded data", (8*len(decoded_message_uint8)),"bits")
klaar_voor_src_dec = util.uint8_to_bit(decoded_message_uint8)
huf_decoded_message = huffman.decode(huffman_tree, klaar_voor_src_dec)
print(F"Dec: {t.toc_str()}")
print("Huffman decoded lengte:", (8*len(huf_decoded_message)), "bits, = lengte originele data")

# ======================= Source recreating ========================
print("-------START SOURCE RECREATING-------")
verhouding = np.reshape(huf_decoded_message, (image.height, image.width, image.num_of_channels))

afbeelding = Image.fromarray(verhouding,image.mode)
afbeelding.show()
Beispiel #17
0
    def ReadDataBlock(self, codingParams):
        """
        Reads a block of coded data from a PACFile object that has already
        executed OpenForReading() and returns those samples as reconstituted
        signed-fraction data
        """
        # loop over channels (whose coded data are stored separately) and read in each data block
        data = []
        for iCh in range(codingParams.nChannels):
            data.append(np.array(
                [], dtype=np.float64))  # add location for this channel's data
            # read in string containing the number of bytes of data for this channel (but check if at end of file!)
            s = self.fp.read(calcsize("<L"))  # will be empty if at end of file
            if not s:
                # hit last block, see if final overlap and add needs returning, else return nothing
                if codingParams.overlapAndAdd:
                    overlapAndAdd = codingParams.overlapAndAdd
                    codingParams.overlapAndAdd = 0  # setting it to zero so next pass will just return
                    return overlapAndAdd
                else:
                    return
            # not at end of file, get nBytes from the string we just read
            nBytes = unpack("<L",
                            s)[0]  # read it as a little-endian unsigned long
            # read the nBytes of data into a PackedBits object to unpack
            pb = PackedBits()
            pb.SetPackedData(
                self.fp.read(nBytes)
            )  # PackedBits function SetPackedData() converts strings to internally-held array of bytes
            if pb.nBytes < nBytes:
                raise "Only read a partial block of coded PACFile data"

            # extract the data from the PackedBits object
            codingParams.state = pb.ReadBits(2)  # read in blockType
            overallScaleFactor = pb.ReadBits(
                codingParams.nScaleBits)  # overall scale factor
            hTable = pb.ReadBits(
                codingParams.nHuffTableBits)  # huffman table code
            scaleFactor = []
            bitAlloc = []

            if codingParams.state == 0:
                mantissa = np.zeros(codingParams.nMDCTLinesLong,
                                    np.int32)  # start w/ all mantissas zero
            elif codingParams.state == 1 or codingParams.state == 3:
                mantissa = np.zeros(codingParams.nMDCTLinesTrans,
                                    np.int32)  # start w/ all mantissas zero
            else:
                mantissa = np.zeros(codingParams.nMDCTLinesShort,
                                    np.int32)  # start w/ all mantissas zero

            for iBand in range(
                    codingParams.sfBandsLong.nBands
            ):  # loop over each scale factor band to pack its data
                ba = pb.ReadBits(codingParams.nMantSizeBits)
                if ba:
                    ba += 1  # no bit allocation of 1 so ba of 2 and up stored as one less
                bitAlloc.append(ba)  # bit allocation for this band
                scaleFactor.append(pb.ReadBits(
                    codingParams.nScaleBits))  # scale factor for this band
                if bitAlloc[iBand]:
                    if codingParams.state == 0:
                        nMDCTLines = codingParams.nMDCTLinesLong
                        nLines = codingParams.sfBandsLong.nLines[iBand]
                        lowerLine = codingParams.sfBandsLong.lowerLine[iBand]
                        upperLine = codingParams.sfBandsLong.upperLine[iBand]
                    elif codingParams.state == 1 or codingParams.state == 3:
                        nMDCTLines = codingParams.nMDCTLinesTrans
                        nLines = codingParams.sfBandsTrans.nLines[iBand]
                        lowerLine = codingParams.sfBandsTrans.lowerLine[iBand]
                        upperLine = codingParams.sfBandsTrans.upperLine[iBand]
                    else:
                        nMDCTLines = codingParams.nMDCTLinesShort
                        nLines = codingParams.sfBandsShort.nLines[iBand]
                        lowerLine = codingParams.sfBandsShort.lowerLine[iBand]
                        upperLine = codingParams.sfBandsShort.upperLine[iBand]

                    # read non huffman encoded mantissas
                    if hTable == 0:
                        m = np.empty(nLines, np.int32)
                        for j in range(nLines):
                            m[j] = pb.ReadBits(
                                bitAlloc[iBand]
                            )  # mantissas for this band (if bit allocation non-zero) and bit alloc <>1 so encoded as 1 lower than actual allocation
                        mantissa[lowerLine:upperLine + 1] = m
                    # read huffman mantissas
                    else:
                        nHuffBits = pb.ReadBits(codingParams.nHuffLengthBits)
                        nChunks = int(np.ceil(nHuffBits / 16.))
                        huffBits = np.empty(nChunks +
                                            1).astype(dtype=np.uint16)
                        huffBits[0] = nHuffBits
                        for i in range(nChunks):
                            bits = pb.ReadBits(np.min([16, nHuffBits]))
                            if (nHuffBits < 16):
                                bits = bits << (16 - nHuffBits)
                            huffBits[i + 1] = bits
                            nHuffBits = nHuffBits - 16
                        if huffBits.any():
                            decoded = decode(
                                huffBits,
                                codingParams.encodingTrees[hTable - 1])
                            mantissa[lowerLine:upperLine + 1] = decoded

            # done unpacking data (end loop over scale factor bands)

            # (DECODE HERE) decode the unpacked data for this channel, overlap-and-add first half, and append it to the data array (saving other half for next overlap-and-add)
            decodedData = self.Decode(scaleFactor, bitAlloc, mantissa,
                                      overallScaleFactor, codingParams)
            data[iCh] = np.concatenate(
                (data[iCh],
                 np.add(codingParams.overlapAndAdd[iCh],
                        decodedData[:codingParams.a])
                 ))  # data[iCh] is overlap-and-added data
            codingParams.overlapAndAdd[iCh] = decodedData[
                codingParams.a:]  # save other half for next pass

        # end loop over channels, return signed-fraction samples for this block
        return data
Beispiel #18
0
import huffman


huffman.encode("input.txt", "test.huff")
huffman.decode("test.huff", "output.txt")
Beispiel #19
0
def decode(file, tree):
	f= open('huff_'+file,'r')
	text = f.read()
	f.close
	print lz.decode(huff.decode(tree, text))
Beispiel #20
0
 def test_decode(self):
     decode("story.huff", "story_.txt")
     assert True
Beispiel #21
0
huffRes = huffman.encode(filechars)

# run length encoding
import RLE
rleRes = RLE.encode(huffRes)

# sizes
hsize = len(huffRes.tobytes())
rsize = int(len(rleRes) * 1.5 / 8)
print "original size = ", reduce(
    lambda x, y: x * y, image.shape
), " bytes, huffman size = ", hsize, "bytes, compressed size = ", rsize, " bytes"

# decoding
decoded = RLE.decode(rleRes)
decoded = eval("[" + huffman.decode(decoded)[:len(filechars)] + "]")

pointer = 0
final = np.zeros(image.shape, np.uint8)
for idx, channel in enumerate(copy):
    rows, cols = channel.shape

    # we undo the zigzag traversal
    for row in range(0, rows, 8):
        for col in range(0, cols, 8):

            bloc = np.zeros((64), np.float32)
            bloc[Zigzag] = decoded[pointer:pointer + 64]
            pointer += 64

            # apply the inverse DCT
Beispiel #22
0
    if len(sys.argv) < 3:

        print("Too few arguments given!")
        print("Please format your command like this:")
        print("    python encode_file.py <Path to input> <Path to output>")
        print()
        print("In the output path, please do not specify a file extension.")
        print("This program will create two input files:")
        print("    <INPUT>.data")
        print("    <INPUT>.keys")
        print("Both of these files are necessary to decompress our data.")
        exit()

    input_path = sys.argv[1]
    output_path = sys.argv[2]
    bin_string = ""
    data = dict()

    print("Reading {0}.data & {0}.keys...".format(input_path))

    try:
        bin_string, data = read(input_path)
    except FileNotFoundError as fnf:
        print("ERROR: File {0} not found!".format(input_path))

    print("Decoding the file...")
    decoded = decode(bin_string, data)

    print("Writing out to {0}...".format(output_path))
    write(output_path, decoded)
Beispiel #23
0
 def test_decode(self):
     decode("", "")
     assert True
Beispiel #24
0
string = h.remove_spl_ch(string)
message = h.remove_spl_ch(message)

# to create the huffman map
prob_of_characters, enc_dict = h.encode_dict(input=string)

print("\nencoded dictionary  : ", end="\n\n")

for key, value in enc_dict:
    print(key, " : ", value)

print("\n\n")
print("probability of characters : ", end="\n\n")
for key, value in prob_of_characters:
    print(key, " : ", value)
print("\n\n")

# to encode the message(custom) using the huffman map
enc_msg = h.encode(msg=message, dictionary=enc_dict)
print("encoded message : ", enc_msg, end="\n\n")

# to encode the original string using huffman map
enc_string = h.encode(msg=message, dictionary=enc_dict)

# to decode the encoded message using huffman map
dec_msg = h.decode(enc_msg=enc_msg, dictionary=enc_dict)
print("decoded message : ", dec_msg, end="\n\n")

# to get information about the space saved
h.size_saved(dictionary=enc_dict, msg=string, enc_msg=enc_string)
Beispiel #25
0
def test_decode_reverses_encode_unicode():
    string = 'Kærlighed og Øl!'
    tree = encode(TEST_FILE, string)
    decoded = decode(TEST_FILE, tree)
    assert decoded.startswith(string)
Beispiel #26
0
def decode(file, tree):
    f = open('huff_' + file, 'r')
    text = f.read()
    f.close
    print lz.decode(huff.decode(tree, text))
Beispiel #27
0
import pickle
import argparse
from huffman import decode

parser = argparse.ArgumentParser()
parser.add_argument('input', help='path to input binary file.')
parser.add_argument('output', help='path to output text file.')
args = parser.parse_args()

with open(args.input, 'rb') as in_, open(args.output, 'wb') as out:
    out.write(decode(*pickle.load(in_)))
Beispiel #28
0
from pprint import pprint

import huffman
from view import viz_tree

data = b"huffman"

tree = huffman.build_tree(data)
map_code = huffman.build_map_code(tree)

# encode
bin_data = huffman.encode(data, map_code)

print("Map code")
for k, v in map_code.items():
    print("{}: {}".format(chr(k), v.to01()))
print("Encoded data")
print(bin_data.to01())
viz_tree(tree)

# decode
print("After decode")
print(huffman.decode(bin_data.tobytes(), map_code, bin_data.buffer_info()[3]))

# calculate performance
p = len(bin_data) / (len(data) * 8)
print(f"Reduce {p * 100}%")
Beispiel #29
0
 def test_decode(self):
     decode("test_file.huff", "test_file_.txt")
     assert True
Beispiel #30
0
t = Time()

t.tic()
# TODO Determine the number of occurrences of the source or use a fixed huffman_freq
huffman_freq = "TODO"
huffman_tree = huffman.Tree(huffman_freq)
print(F"Generating the Huffman Tree took {t.toc_str()}")

t.tic()
# TODO print-out the codebook and validate the codebook (include your findings in the report)
encoded_message = huffman.encode(huffman_tree.codebook, image.get_pixel_seq())
print(len(encoded_message))
print("Enc: {}".format(t.toc()))

t.tic()
decoded_message = huffman.decode(huffman_tree, encoded_message)
print("Dec: {}".format(t.toc()))

input_lzw = img.get_pixel_seq().copy()

# ======================= SOURCE ENCODING ========================
# ====================== Lempel-Ziv-Welch ========================

t.tic()
encoded_msg, dictonary = lzw.encode(input_lzw)
print("Enc: {}".format(t.toc()))

t.tic()
decoded_msg = lzw.decode(encoded_msg)
print("Enc: {0:.4f}".format(t.toc()))