def compress(self, output_file): """ Compress data from input file and write compressed data to the output file. """ # read input and count chars freq = defaultdict(int) length = 0 while True: # read one char from a file char = self.file.read(1) if char: length += 1 freq[ord(char)] += 1 else: # EOF break # build Huffman trie root = self._build_trie(freq) # build symbol table for chars and their binary representation st = dict() self._build_code(st, root, '') bw = BitWriter(output_file) # write the Huffman trie binary representation to the file self._write_trie(root, bw) # write number of bytes in original uncompressed message bw.writebits(length, 8) # use Huffman code to encode input for i in xrange(length): self.file.seek(i) code = st[ord(self.file.read(1))] for c in code: if c == '0': bw.writebit(False) else: bw.writebit(True)
def compress(tree, uncompressed, compressed): '''First write the given tree to the stream 'compressed' using the write_tree function. Then use the same tree to encode the data from the input stream 'uncompressed' and write it to 'compressed'. If there are any partially-written bytes remaining at the end, write 0 bits to form a complete byte. Args: tree: A Huffman tree. uncompressed: A file stream from which you can read the input. compressed: A file stream that will receive the tree description and the coded input data. ''' table = huffman.make_encoding_table(tree) uncomp = BitReader(uncompressed) comp = BitWriter(compressed) write_tree(tree, comp) while True: try: uncomp_btye = uncomp.readbits(8) print(uncomp_btye) comp_path = table[uncomp_btye] for bit in comp_path: if bit == False: comp.writebit(0) elif bit == True: comp.writebit(1) print(comp_path) except EOFError: comp_path = table[None] print("EOF") for bit in comp_path: comp.writebit(bit) print(comp_path) break comp.flush()