Example #1
0
def decompress(compressed, uncompressed):
    '''First, read a Huffman tree from the 'compressed' stream using your
    read_tree function. Then use that tree to decode the rest of the
    stream and write the resulting symbols to the 'uncompressed'
    stream.

    Args:
      compressed: A file stream from which compressed input is read.
      uncompressed: A writable file stream to which the uncompressed
          output is written.

    '''
    comp = BitReader(compressed)
    uncomp = BitWriter(uncompressed)

    tree = read_tree(comp)

    while True:
        try:
            uncomp_byte = huffman.decode(tree, comp)
            if uncomp_byte == None:
                raise EOFError
            uncomp.writebits(uncomp_byte, 8)

        except EOFError:
            uncomp.writebits(29, 8)
            break
 def extract(self, output_file):
     """
     Restore the compressed data
     """
     br = BitReader(self.file)
     root = self._read_trie(br)
     # number of bytes to write
     length = int(br.readbits(8))
     bw = BitWriter(output_file)
     # decode using the Huffman trie
     for i in xrange(length):
         node = root
         while not node.is_leaf:
             bit = br.readbit()
             if bit:
                 node = node.right
             else:
                 node = node.left
         # write the character to output
         bw.writebits(node.char, 8)
Example #3
0
 def compress(self, output_file):
     """
     Compress data from input file and write compressed data to the output file.
     """
     # create a ternary search trie and fill it with single ASCII characters
     st = TernarySt()
     for i in xrange(self.radix):
         st[chr(i)] = i
     code = self.radix + 1
     # read all the data from the input file (not optimal, but easy to code)
     data = self.file.read()
     bw = BitWriter(output_file)
     while len(data) > 0:
         lp = st.longest_prefix(data)
         # write the value of the prefix to output
         bw.writebits(st[lp], self.codeword_width)
         if len(lp) < len(data) and code < self.codeword_limit:
             # add new prefix to the symbol table
             st[data[:len(lp) + 1]] = code
             code += 1
         data = data[len(lp):]
     bw.writebits(self.radix, self.codeword_width)
Example #4
0
 def compress(self, output_file):
     """
     Compress data from input file and write compressed data to the output file.
     """
     # create a ternary search trie and fill it with single ASCII characters
     st = TernarySt()
     for i in xrange(self.radix):
         st[chr(i)] = i
     code = self.radix + 1
     # read all the data from the input file (not optimal, but easy to code)
     data = self.file.read()
     bw = BitWriter(output_file)
     while len(data) > 0:
         lp = st.longest_prefix(data)
         # write the value of the prefix to output
         bw.writebits(st[lp], self.codeword_width)
         if len(lp) < len(data) and code < self.codeword_limit:
             # add new prefix to the symbol table
             st[data[:len(lp) + 1]] = code
             code += 1
         data = data[len(lp):]
     bw.writebits(self.radix, self.codeword_width)
    def compress(self, output_file):
        """
        Compress data from input file and write compressed data to the output file.
        """
        # read input and count chars
        freq = defaultdict(int)
        length = 0
        while True:
            # read one char from a file
            char = self.file.read(1)
            if char:
                length += 1
                freq[ord(char)] += 1
            else:
                # EOF
                break

        # build Huffman trie
        root = self._build_trie(freq)
        # build symbol table for chars and their binary representation
        st = dict()
        self._build_code(st, root, '')
        bw = BitWriter(output_file)
        # write the Huffman trie binary representation to the file
        self._write_trie(root, bw)
        # write number of bytes in original uncompressed message
        bw.writebits(length, 8)
        # use Huffman code to encode input
        for i in xrange(length):
            self.file.seek(i)
            code = st[ord(self.file.read(1))]
            for c in code:
                if c == '0':
                    bw.writebit(False)
                else:
                    bw.writebit(True)