def extract(self, output_file): """ Restore the compressed data """ # create a dictionary with key => string matches st = dict() for i in xrange(self.radix): st[i] = chr(i) i += 1 # i = 256 - reserved as an EOF signal br = BitReader(self.file) codeword = int(br.readbits(self.codeword_width)) val = st[codeword] while True: # write unpacked value output_file.write(val) codeword = int(br.readbits(self.codeword_width)) if codeword == self.radix: # EOF break if i == codeword: # special case hack (when we don't have the needed key in st, but we have just met in input) new_value = val + val[0] else: new_value = st[codeword] if i < self.codeword_limit: i += 1 st[i] = val + new_value[0] val = new_value
def __init__(self, fp): self.f = open(fp, 'rb') self.bit_stream = BitReader(self.f) self.bits = [] while True: x = self.bit_stream.read_bit() if not self.bit_stream.read: break self.bits.append(x)
def decompress(compressed, uncompressed): '''First, read a Huffman tree from the 'compressed' stream using your read_tree function. Then use that tree to decode the rest of the stream and write the resulting symbols to the 'uncompressed' stream. Args: compressed: A file stream from which compressed input is read. uncompressed: A writable file stream to which the uncompressed output is written. ''' comp = BitReader(compressed) uncomp = BitWriter(uncompressed) tree = read_tree(comp) while True: try: uncomp_byte = huffman.decode(tree, comp) if uncomp_byte == None: raise EOFError uncomp.writebits(uncomp_byte, 8) except EOFError: uncomp.writebits(29, 8) break
def compress(origin_filepath, compress_filepath): """ 将``原始文件``压缩到``压缩文件``中 :param origin_filepath: 原始文件 :param compress_filepath: 压缩文件 :return: 没有返回值 """ ori_f = open(origin_filepath, 'rb') com_f = open(compress_filepath, 'wb') with BitReader(ori_f) as reader: with BitWriter(com_f) as writer: cnt = 0 old = False while True: b = True if reader.read_bit() else False if not reader.read: # End-of-file? break if b is not old: writer.write_bits(cnt, RunLength.encoding_length) cnt = 0 old = not old else: if cnt == RunLength.max_length: writer.write_bits(cnt, RunLength.encoding_length) cnt = 0 # 另一种比特长度为0, 然后可以接着继续之前的比特计数 writer.write_bits(cnt, RunLength.encoding_length) cnt += 1 writer.write_bits(cnt, RunLength.encoding_length) ori_f.close() com_f.close()
class FileBits: def __init__(self, fp): self.f = open(fp, 'rb') self.bit_stream = BitReader(self.f) self.bits = [] while True: x = self.bit_stream.read_bit() if not self.bit_stream.read: break self.bits.append(x) def __eq__(self, other): if len(self.bits) != len(other.bits): return False return self.bits == other.bits def __repr__(self): b = ['1' if x else '0' for x in self.bits] return ''.join(b) def __len__(self): return len(self.bits) def __del__(self): self.f.close()
def compress(origin_filepath, compress_filepath): """ 将``原始文件``压缩到``压缩文件``中 :param origin_filepath: 原始文件 :param compress_filepath: 压缩文件 :return: 没有返回值 """ # 统计频率(一轮读取) freq = {} text_len = 0 with open(origin_filepath, 'rb') as ori_f: with BitReader(ori_f) as reader: while True: ch = reader.read_bits(Huffman.char_bit_len) if not reader.read: break freq[ch] = freq.get(ch, 0) + 1 text_len += 1 ori_f = open(origin_filepath, 'rb') com_f = open(compress_filepath, 'wb') with BitReader(ori_f) as reader: with BitWriter(com_f) as writer: root = Huffman._build_trie(freq) # 构建Huffman树 code_table = {} Huffman._build_code(code_table, root, '') # 构建Huffman编码映射表 Huffman._write_trie(root, writer) # 将trie写入压缩文件, 解压时用 writer.write_bits(text_len, Huffman.num_bit_len) # 写入输入长度 # 使用Huffman code编码文件(二轮读取) while True: ch = reader.read_bits(Huffman.char_bit_len) if not reader.read: break code = code_table[ch] for b in code: if b == '0': writer.write_bit(False) elif b == '1': writer.write_bit(True) else: raise Exception('Illegal state') ori_f.close() com_f.close()
def compress(tree, uncompressed, compressed): '''First write the given tree to the stream 'compressed' using the write_tree function. Then use the same tree to encode the data from the input stream 'uncompressed' and write it to 'compressed'. If there are any partially-written bytes remaining at the end, write 0 bits to form a complete byte. Args: tree: A Huffman tree. uncompressed: A file stream from which you can read the input. compressed: A file stream that will receive the tree description and the coded input data. ''' table = huffman.make_encoding_table(tree) uncomp = BitReader(uncompressed) comp = BitWriter(compressed) write_tree(tree, comp) while True: try: uncomp_btye = uncomp.readbits(8) print(uncomp_btye) comp_path = table[uncomp_btye] for bit in comp_path: if bit == False: comp.writebit(0) elif bit == True: comp.writebit(1) print(comp_path) except EOFError: comp_path = table[None] print("EOF") for bit in comp_path: comp.writebit(bit) print(comp_path) break comp.flush()
def extract(self, output_file): """ Restore the compressed data """ br = BitReader(self.file) root = self._read_trie(br) # number of bytes to write length = int(br.readbits(8)) bw = BitWriter(output_file) # decode using the Huffman trie for i in xrange(length): node = root while not node.is_leaf: bit = br.readbit() if bit: node = node.right else: node = node.left # write the character to output bw.writebits(node.char, 8)
def __init__(self, string = None, file = None): ''' Wrap a string or file stream in a structure reader. Either string or file must be given. ''' if string is not None: self.stream = StringIO(string) elif file is not None: self.stream = file else: raise "string or file must be non-None" # Bit reader buffer self.bitter = BitReader(self.readByte)
def expand(compress_filepath, origin_filepath): """ 将``压缩文件``, 解压写到``原始文件``中 :param compress_filepath: 压缩文件 :param origin_filepath: 原始文件 :return: 不返回任何值 """ com_f = open(compress_filepath, 'rb') ori_f = open(origin_filepath, 'wb') with BitReader(com_f) as reader: with BitWriter(ori_f) as writer: b = False while True: cnt = reader.read_bits(RunLength.encoding_length) if not reader.read: # End-of-file? break for i in range(cnt): writer.write_bit(b) b = not b com_f.close() ori_f.close()
def compress(origin_filepath, compress_filepath): """ 将``原始文件``压缩到``压缩文件``中 :param origin_filepath: 原始文件 :param compress_filepath: 压缩文件 :return: 没有返回值 """ st = TST() for i in range(LZW.char_set_len): st.put(chr(i), i) code = LZW.char_set_len + 1 # 留出char_set_len这个数字为EOF编码 ori_f = open(origin_filepath, 'rb') com_f = open(compress_filepath, 'wb') with BitReader(ori_f) as reader: with BitWriter(com_f) as writer: # 把8位的一个字节看作一个字符作为输入(可以处理任意文件) input_string = [] while True: ch = reader.read_bits(LZW.char_bit_len) if not reader.read: break input_string.append(chr(ch)) input_string = ''.join(input_string) while len(input_string) > 0: s = st.longest_prefix_of(input_string) # 最长前缀 writer.write_bits(st.get(s), LZW.code_bit_len) # 将s的编码写入压缩文件 if len(s) < len(input_string) and code < LZW.code_set_len: # 将此(最长前缀+前瞻字符)构成的新子串和下一编码关联并加入符号表 st.put(input_string[:len(s) + 1], code) code += 1 input_string = input_string[len(s):] # 输入中s完成读取 writer.write_bits(LZW.char_set_len, LZW.code_bit_len) # EOF的编码 ori_f.close() com_f.close()
def expand(compress_filepath, origin_filepath): """ 将``压缩文件``, 解压写到``原始文件``中 :param compress_filepath: 压缩文件 :param origin_filepath: 原始文件 :return: 不返回任何值 """ st = [] for i in range(LZW.char_set_len): # 用字符初始化编译表 st.append(chr(i)) st.append('') # (并未使用), 看作EOF的前瞻字符 com_f = open(compress_filepath, 'rb') ori_f = open(origin_filepath, 'wb') with BitReader(com_f) as reader: with BitWriter(ori_f) as writer: codeword = reader.read_bits(LZW.code_bit_len) if codeword != LZW.char_set_len: # 文件结尾 val = st[codeword] while True: for ch in val: # 子字符串写入 writer.write_bits(ord(ch), LZW.char_bit_len) codeword = reader.read_bits(LZW.code_bit_len) if codeword == LZW.char_set_len: break if len(st) == codeword: # 需要读取的编码正是要补全符号表的条目 s = val + val[ 0] # 这种情况下,前瞻字符必然是当前字符串首字母(好好思考下, ABABABA) else: s = st[codeword] # 获取当前编码关联的字符串 if len(st) < LZW.code_set_len: st.append(val + s[0]) val = s com_f.close() ori_f.close()
def expand(compress_filepath, origin_filepath): """ 将``压缩文件``, 解压写到``原始文件``中 :param compress_filepath: 压缩文件 :param origin_filepath: 原始文件 :return: 不返回任何值 """ com_f = open(compress_filepath, 'rb') ori_f = open(origin_filepath, 'wb') with BitReader(com_f) as reader: with BitWriter(ori_f) as writer: root = Huffman._read_trie(reader) text_len = reader.read_bits(Huffman.num_bit_len) for i in range(text_len): x = root while not x.is_leaf(): if reader.read_bit(): x = x.right else: x = x.left writer.write_bits(x.ch, Huffman.char_bit_len) com_f.close() ori_f.close()
class Reader(object): ''' Structure stream reader. ''' def __init__(self, string = None, file = None): ''' Wrap a string or file stream in a structure reader. Either string or file must be given. ''' if string is not None: self.stream = StringIO(string) elif file is not None: self.stream = file else: raise "string or file must be non-None" # Bit reader buffer self.bitter = BitReader(self.readByte) def read(self, size): ''' read(size) -> string Read bytes into a raw string. ''' return self.stream.read(size) def readStruct(self, format): ''' readStruct(format) -> tuple Read a structure from the stream. Raises FormatError() if bytes were unavailable. ''' size = struct.calcsize(format) bytes = self.read(size) if len(bytes) != size: raise FormatError() return struct.unpack(format, bytes) def readString(self): ''' readString() -> string Read a UTF8-encoded string from the stream. Raises FormatError() if bytes were unavailable. ''' size = self.readStruct("!I")[0] bytes = self.read(size) if len(bytes) != size: raise FormatError() return bytes.decode("utf-8") def readByte(self): ''' readByte() -> int Read a single byte. ''' return ord(self.read(1)) def readBits(self, bits): ''' Read bits (see ppk.bitio.BitReader). ''' return self.bitter(bits) def skipBits(self): ''' Skip buffered bits (see ppk.bitio.BitReader). ''' self.bitter.skip()