def compress(origin_filepath, compress_filepath): """ 将``原始文件``压缩到``压缩文件``中 :param origin_filepath: 原始文件 :param compress_filepath: 压缩文件 :return: 没有返回值 """ ori_f = open(origin_filepath, 'rb') com_f = open(compress_filepath, 'wb') with BitReader(ori_f) as reader: with BitWriter(com_f) as writer: cnt = 0 old = False while True: b = True if reader.read_bit() else False if not reader.read: # End-of-file? break if b is not old: writer.write_bits(cnt, RunLength.encoding_length) cnt = 0 old = not old else: if cnt == RunLength.max_length: writer.write_bits(cnt, RunLength.encoding_length) cnt = 0 # 另一种比特长度为0, 然后可以接着继续之前的比特计数 writer.write_bits(cnt, RunLength.encoding_length) cnt += 1 writer.write_bits(cnt, RunLength.encoding_length) ori_f.close() com_f.close()
def decompress(compressed, uncompressed): '''First, read a Huffman tree from the 'compressed' stream using your read_tree function. Then use that tree to decode the rest of the stream and write the resulting symbols to the 'uncompressed' stream. Args: compressed: A file stream from which compressed input is read. uncompressed: A writable file stream to which the uncompressed output is written. ''' comp = BitReader(compressed) uncomp = BitWriter(uncompressed) tree = read_tree(comp) while True: try: uncomp_byte = huffman.decode(tree, comp) if uncomp_byte == None: raise EOFError uncomp.writebits(uncomp_byte, 8) except EOFError: uncomp.writebits(29, 8) break
def compress(origin_filepath, compress_filepath): """ 将``原始文件``压缩到``压缩文件``中 :param origin_filepath: 原始文件 :param compress_filepath: 压缩文件 :return: 没有返回值 """ # 统计频率(一轮读取) freq = {} text_len = 0 with open(origin_filepath, 'rb') as ori_f: with BitReader(ori_f) as reader: while True: ch = reader.read_bits(Huffman.char_bit_len) if not reader.read: break freq[ch] = freq.get(ch, 0) + 1 text_len += 1 ori_f = open(origin_filepath, 'rb') com_f = open(compress_filepath, 'wb') with BitReader(ori_f) as reader: with BitWriter(com_f) as writer: root = Huffman._build_trie(freq) # 构建Huffman树 code_table = {} Huffman._build_code(code_table, root, '') # 构建Huffman编码映射表 Huffman._write_trie(root, writer) # 将trie写入压缩文件, 解压时用 writer.write_bits(text_len, Huffman.num_bit_len) # 写入输入长度 # 使用Huffman code编码文件(二轮读取) while True: ch = reader.read_bits(Huffman.char_bit_len) if not reader.read: break code = code_table[ch] for b in code: if b == '0': writer.write_bit(False) elif b == '1': writer.write_bit(True) else: raise Exception('Illegal state') ori_f.close() com_f.close()
def compress(tree, uncompressed, compressed): '''First write the given tree to the stream 'compressed' using the write_tree function. Then use the same tree to encode the data from the input stream 'uncompressed' and write it to 'compressed'. If there are any partially-written bytes remaining at the end, write 0 bits to form a complete byte. Args: tree: A Huffman tree. uncompressed: A file stream from which you can read the input. compressed: A file stream that will receive the tree description and the coded input data. ''' table = huffman.make_encoding_table(tree) uncomp = BitReader(uncompressed) comp = BitWriter(compressed) write_tree(tree, comp) while True: try: uncomp_btye = uncomp.readbits(8) print(uncomp_btye) comp_path = table[uncomp_btye] for bit in comp_path: if bit == False: comp.writebit(0) elif bit == True: comp.writebit(1) print(comp_path) except EOFError: comp_path = table[None] print("EOF") for bit in comp_path: comp.writebit(bit) print(comp_path) break comp.flush()
def extract(self, output_file): """ Restore the compressed data """ br = BitReader(self.file) root = self._read_trie(br) # number of bytes to write length = int(br.readbits(8)) bw = BitWriter(output_file) # decode using the Huffman trie for i in xrange(length): node = root while not node.is_leaf: bit = br.readbit() if bit: node = node.right else: node = node.left # write the character to output bw.writebits(node.char, 8)
def expand(compress_filepath, origin_filepath): """ 将``压缩文件``, 解压写到``原始文件``中 :param compress_filepath: 压缩文件 :param origin_filepath: 原始文件 :return: 不返回任何值 """ com_f = open(compress_filepath, 'rb') ori_f = open(origin_filepath, 'wb') with BitReader(com_f) as reader: with BitWriter(ori_f) as writer: b = False while True: cnt = reader.read_bits(RunLength.encoding_length) if not reader.read: # End-of-file? break for i in range(cnt): writer.write_bit(b) b = not b com_f.close() ori_f.close()
def compress(self, output_file): """ Compress data from input file and write compressed data to the output file. """ # create a ternary search trie and fill it with single ASCII characters st = TernarySt() for i in xrange(self.radix): st[chr(i)] = i code = self.radix + 1 # read all the data from the input file (not optimal, but easy to code) data = self.file.read() bw = BitWriter(output_file) while len(data) > 0: lp = st.longest_prefix(data) # write the value of the prefix to output bw.writebits(st[lp], self.codeword_width) if len(lp) < len(data) and code < self.codeword_limit: # add new prefix to the symbol table st[data[:len(lp) + 1]] = code code += 1 data = data[len(lp):] bw.writebits(self.radix, self.codeword_width)
def compress(origin_filepath, compress_filepath): """ 将``原始文件``压缩到``压缩文件``中 :param origin_filepath: 原始文件 :param compress_filepath: 压缩文件 :return: 没有返回值 """ st = TST() for i in range(LZW.char_set_len): st.put(chr(i), i) code = LZW.char_set_len + 1 # 留出char_set_len这个数字为EOF编码 ori_f = open(origin_filepath, 'rb') com_f = open(compress_filepath, 'wb') with BitReader(ori_f) as reader: with BitWriter(com_f) as writer: # 把8位的一个字节看作一个字符作为输入(可以处理任意文件) input_string = [] while True: ch = reader.read_bits(LZW.char_bit_len) if not reader.read: break input_string.append(chr(ch)) input_string = ''.join(input_string) while len(input_string) > 0: s = st.longest_prefix_of(input_string) # 最长前缀 writer.write_bits(st.get(s), LZW.code_bit_len) # 将s的编码写入压缩文件 if len(s) < len(input_string) and code < LZW.code_set_len: # 将此(最长前缀+前瞻字符)构成的新子串和下一编码关联并加入符号表 st.put(input_string[:len(s) + 1], code) code += 1 input_string = input_string[len(s):] # 输入中s完成读取 writer.write_bits(LZW.char_set_len, LZW.code_bit_len) # EOF的编码 ori_f.close() com_f.close()
def expand(compress_filepath, origin_filepath): """ 将``压缩文件``, 解压写到``原始文件``中 :param compress_filepath: 压缩文件 :param origin_filepath: 原始文件 :return: 不返回任何值 """ st = [] for i in range(LZW.char_set_len): # 用字符初始化编译表 st.append(chr(i)) st.append('') # (并未使用), 看作EOF的前瞻字符 com_f = open(compress_filepath, 'rb') ori_f = open(origin_filepath, 'wb') with BitReader(com_f) as reader: with BitWriter(ori_f) as writer: codeword = reader.read_bits(LZW.code_bit_len) if codeword != LZW.char_set_len: # 文件结尾 val = st[codeword] while True: for ch in val: # 子字符串写入 writer.write_bits(ord(ch), LZW.char_bit_len) codeword = reader.read_bits(LZW.code_bit_len) if codeword == LZW.char_set_len: break if len(st) == codeword: # 需要读取的编码正是要补全符号表的条目 s = val + val[ 0] # 这种情况下,前瞻字符必然是当前字符串首字母(好好思考下, ABABABA) else: s = st[codeword] # 获取当前编码关联的字符串 if len(st) < LZW.code_set_len: st.append(val + s[0]) val = s com_f.close() ori_f.close()
def compress(self, output_file): """ Compress data from input file and write compressed data to the output file. """ # read input and count chars freq = defaultdict(int) length = 0 while True: # read one char from a file char = self.file.read(1) if char: length += 1 freq[ord(char)] += 1 else: # EOF break # build Huffman trie root = self._build_trie(freq) # build symbol table for chars and their binary representation st = dict() self._build_code(st, root, '') bw = BitWriter(output_file) # write the Huffman trie binary representation to the file self._write_trie(root, bw) # write number of bytes in original uncompressed message bw.writebits(length, 8) # use Huffman code to encode input for i in xrange(length): self.file.seek(i) code = st[ord(self.file.read(1))] for c in code: if c == '0': bw.writebit(False) else: bw.writebit(True)
def expand(compress_filepath, origin_filepath): """ 将``压缩文件``, 解压写到``原始文件``中 :param compress_filepath: 压缩文件 :param origin_filepath: 原始文件 :return: 不返回任何值 """ com_f = open(compress_filepath, 'rb') ori_f = open(origin_filepath, 'wb') with BitReader(com_f) as reader: with BitWriter(ori_f) as writer: root = Huffman._read_trie(reader) text_len = reader.read_bits(Huffman.num_bit_len) for i in range(text_len): x = root while not x.is_leaf(): if reader.read_bit(): x = x.right else: x = x.left writer.write_bits(x.ch, Huffman.char_bit_len) com_f.close() ori_f.close()