Example #1
0
 def extract(self, output_file):
     """
     Restore the compressed data
     """
     # create a dictionary with key => string matches
     st = dict()
     for i in xrange(self.radix):
         st[i] = chr(i)
     i += 1  # i = 256 - reserved as an EOF signal
     br = BitReader(self.file)
     codeword = int(br.readbits(self.codeword_width))
     val = st[codeword]
     while True:
         # write unpacked value
         output_file.write(val)
         codeword = int(br.readbits(self.codeword_width))
         if codeword == self.radix:
             # EOF
             break
         if i == codeword:
             # special case hack (when we don't have the needed key in st, but we have just met in input)
             new_value = val + val[0]
         else:
             new_value = st[codeword]
         if i < self.codeword_limit:
             i += 1
             st[i] = val + new_value[0]
         val = new_value
Example #2
0
 def extract(self, output_file):
     """
     Restore the compressed data
     """
     # create a dictionary with key => string matches
     st = dict()
     for i in xrange(self.radix):
         st[i] = chr(i)
     i += 1  # i = 256 - reserved as an EOF signal
     br = BitReader(self.file)
     codeword = int(br.readbits(self.codeword_width))
     val = st[codeword]
     while True:
         # write unpacked value
         output_file.write(val)
         codeword = int(br.readbits(self.codeword_width))
         if codeword == self.radix:
             # EOF
             break
         if i == codeword:
             # special case hack (when we don't have the needed key in st, but we have just met in input)
             new_value = val + val[0]
         else:
             new_value = st[codeword]
         if i < self.codeword_limit:
             i += 1
             st[i] = val + new_value[0]
         val = new_value
Example #3
0
 def __init__(self, fp):
     self.f = open(fp, 'rb')
     self.bit_stream = BitReader(self.f)
     self.bits = []
     while True:
         x = self.bit_stream.read_bit()
         if not self.bit_stream.read:
             break
         self.bits.append(x)
Example #4
0
def decompress(compressed, uncompressed):
    '''First, read a Huffman tree from the 'compressed' stream using your
    read_tree function. Then use that tree to decode the rest of the
    stream and write the resulting symbols to the 'uncompressed'
    stream.

    Args:
      compressed: A file stream from which compressed input is read.
      uncompressed: A writable file stream to which the uncompressed
          output is written.

    '''
    comp = BitReader(compressed)
    uncomp = BitWriter(uncompressed)

    tree = read_tree(comp)

    while True:
        try:
            uncomp_byte = huffman.decode(tree, comp)
            if uncomp_byte == None:
                raise EOFError
            uncomp.writebits(uncomp_byte, 8)

        except EOFError:
            uncomp.writebits(29, 8)
            break
Example #5
0
    def compress(origin_filepath, compress_filepath):
        """
        将``原始文件``压缩到``压缩文件``中
        :param origin_filepath: 原始文件
        :param compress_filepath: 压缩文件
        :return: 没有返回值
        """
        ori_f = open(origin_filepath, 'rb')
        com_f = open(compress_filepath, 'wb')

        with BitReader(ori_f) as reader:
            with BitWriter(com_f) as writer:
                cnt = 0
                old = False
                while True:
                    b = True if reader.read_bit() else False
                    if not reader.read:  # End-of-file?
                        break
                    if b is not old:
                        writer.write_bits(cnt, RunLength.encoding_length)
                        cnt = 0
                        old = not old
                    else:
                        if cnt == RunLength.max_length:
                            writer.write_bits(cnt, RunLength.encoding_length)
                            cnt = 0  # 另一种比特长度为0, 然后可以接着继续之前的比特计数
                            writer.write_bits(cnt, RunLength.encoding_length)
                    cnt += 1
                writer.write_bits(cnt, RunLength.encoding_length)

        ori_f.close()
        com_f.close()
Example #6
0
class FileBits:
    def __init__(self, fp):
        self.f = open(fp, 'rb')
        self.bit_stream = BitReader(self.f)
        self.bits = []
        while True:
            x = self.bit_stream.read_bit()
            if not self.bit_stream.read:
                break
            self.bits.append(x)

    def __eq__(self, other):
        if len(self.bits) != len(other.bits):
            return False
        return self.bits == other.bits

    def __repr__(self):
        b = ['1' if x else '0' for x in self.bits]
        return ''.join(b)

    def __len__(self):
        return len(self.bits)

    def __del__(self):
        self.f.close()
Example #7
0
    def compress(origin_filepath, compress_filepath):
        """
        将``原始文件``压缩到``压缩文件``中
        :param origin_filepath: 原始文件
        :param compress_filepath: 压缩文件
        :return: 没有返回值
        """
        # 统计频率(一轮读取)
        freq = {}
        text_len = 0
        with open(origin_filepath, 'rb') as ori_f:
            with BitReader(ori_f) as reader:
                while True:
                    ch = reader.read_bits(Huffman.char_bit_len)
                    if not reader.read:
                        break
                    freq[ch] = freq.get(ch, 0) + 1
                    text_len += 1

        ori_f = open(origin_filepath, 'rb')
        com_f = open(compress_filepath, 'wb')

        with BitReader(ori_f) as reader:
            with BitWriter(com_f) as writer:
                root = Huffman._build_trie(freq)  # 构建Huffman树
                code_table = {}
                Huffman._build_code(code_table, root, '')  # 构建Huffman编码映射表
                Huffman._write_trie(root, writer)  # 将trie写入压缩文件, 解压时用
                writer.write_bits(text_len, Huffman.num_bit_len)  # 写入输入长度

                # 使用Huffman code编码文件(二轮读取)
                while True:
                    ch = reader.read_bits(Huffman.char_bit_len)
                    if not reader.read:
                        break
                    code = code_table[ch]
                    for b in code:
                        if b == '0':
                            writer.write_bit(False)
                        elif b == '1':
                            writer.write_bit(True)
                        else:
                            raise Exception('Illegal state')
        ori_f.close()
        com_f.close()
Example #8
0
def compress(tree, uncompressed, compressed):
    '''First write the given tree to the stream 'compressed' using the
    write_tree function. Then use the same tree to encode the data
    from the input stream 'uncompressed' and write it to 'compressed'.
    If there are any partially-written bytes remaining at the end,
    write 0 bits to form a complete byte.

    Args:
      tree: A Huffman tree.
      uncompressed: A file stream from which you can read the input.
      compressed: A file stream that will receive the tree description
          and the coded input data.
    '''
    table = huffman.make_encoding_table(tree)
    uncomp = BitReader(uncompressed)
    comp = BitWriter(compressed)

    write_tree(tree, comp)
    while True:
        try:
            uncomp_btye = uncomp.readbits(8)
            print(uncomp_btye)

            comp_path = table[uncomp_btye]

            for bit in comp_path:
                if bit == False:
                    comp.writebit(0)
                elif bit == True:
                    comp.writebit(1)
            print(comp_path)

        except EOFError:
            comp_path = table[None]
            print("EOF")

            for bit in comp_path:
                comp.writebit(bit)
            print(comp_path)
            break

    comp.flush()
 def extract(self, output_file):
     """
     Restore the compressed data
     """
     br = BitReader(self.file)
     root = self._read_trie(br)
     # number of bytes to write
     length = int(br.readbits(8))
     bw = BitWriter(output_file)
     # decode using the Huffman trie
     for i in xrange(length):
         node = root
         while not node.is_leaf:
             bit = br.readbit()
             if bit:
                 node = node.right
             else:
                 node = node.left
         # write the character to output
         bw.writebits(node.char, 8)
Example #10
0
    def __init__(self, string = None, file = None):
        '''
        Wrap a string or file stream in a structure reader.
        Either string or file must be given.
        '''
        if string is not None:
            self.stream = StringIO(string)
        elif file is not None:
            self.stream = file
        else:
            raise "string or file must be non-None"

        # Bit reader buffer
        self.bitter = BitReader(self.readByte)
Example #11
0
 def expand(compress_filepath, origin_filepath):
     """
     将``压缩文件``, 解压写到``原始文件``中
     :param compress_filepath: 压缩文件
     :param origin_filepath: 原始文件
     :return: 不返回任何值
     """
     com_f = open(compress_filepath, 'rb')
     ori_f = open(origin_filepath, 'wb')
     with BitReader(com_f) as reader:
         with BitWriter(ori_f) as writer:
             b = False
             while True:
                 cnt = reader.read_bits(RunLength.encoding_length)
                 if not reader.read:  # End-of-file?
                     break
                 for i in range(cnt):
                     writer.write_bit(b)
                 b = not b
     com_f.close()
     ori_f.close()
Example #12
0
    def compress(origin_filepath, compress_filepath):
        """
        将``原始文件``压缩到``压缩文件``中
        :param origin_filepath: 原始文件
        :param compress_filepath: 压缩文件
        :return: 没有返回值
        """
        st = TST()
        for i in range(LZW.char_set_len):
            st.put(chr(i), i)
        code = LZW.char_set_len + 1  # 留出char_set_len这个数字为EOF编码

        ori_f = open(origin_filepath, 'rb')
        com_f = open(compress_filepath, 'wb')

        with BitReader(ori_f) as reader:
            with BitWriter(com_f) as writer:
                # 把8位的一个字节看作一个字符作为输入(可以处理任意文件)
                input_string = []
                while True:
                    ch = reader.read_bits(LZW.char_bit_len)
                    if not reader.read:
                        break
                    input_string.append(chr(ch))
                input_string = ''.join(input_string)

                while len(input_string) > 0:
                    s = st.longest_prefix_of(input_string)  # 最长前缀
                    writer.write_bits(st.get(s),
                                      LZW.code_bit_len)  # 将s的编码写入压缩文件
                    if len(s) < len(input_string) and code < LZW.code_set_len:
                        # 将此(最长前缀+前瞻字符)构成的新子串和下一编码关联并加入符号表
                        st.put(input_string[:len(s) + 1], code)
                        code += 1
                    input_string = input_string[len(s):]  # 输入中s完成读取
                writer.write_bits(LZW.char_set_len, LZW.code_bit_len)  # EOF的编码
        ori_f.close()
        com_f.close()
Example #13
0
    def expand(compress_filepath, origin_filepath):
        """
        将``压缩文件``, 解压写到``原始文件``中
        :param compress_filepath: 压缩文件
        :param origin_filepath: 原始文件
        :return: 不返回任何值
        """
        st = []
        for i in range(LZW.char_set_len):  # 用字符初始化编译表
            st.append(chr(i))
        st.append('')  # (并未使用), 看作EOF的前瞻字符

        com_f = open(compress_filepath, 'rb')
        ori_f = open(origin_filepath, 'wb')

        with BitReader(com_f) as reader:
            with BitWriter(ori_f) as writer:
                codeword = reader.read_bits(LZW.code_bit_len)
                if codeword != LZW.char_set_len:  # 文件结尾
                    val = st[codeword]
                    while True:
                        for ch in val:  # 子字符串写入
                            writer.write_bits(ord(ch), LZW.char_bit_len)
                        codeword = reader.read_bits(LZW.code_bit_len)
                        if codeword == LZW.char_set_len:
                            break
                        if len(st) == codeword:  # 需要读取的编码正是要补全符号表的条目
                            s = val + val[
                                0]  # 这种情况下,前瞻字符必然是当前字符串首字母(好好思考下, ABABABA)
                        else:
                            s = st[codeword]  # 获取当前编码关联的字符串
                        if len(st) < LZW.code_set_len:
                            st.append(val + s[0])
                        val = s
        com_f.close()
        ori_f.close()
Example #14
0
    def expand(compress_filepath, origin_filepath):
        """
        将``压缩文件``, 解压写到``原始文件``中
        :param compress_filepath: 压缩文件
        :param origin_filepath: 原始文件
        :return: 不返回任何值
        """
        com_f = open(compress_filepath, 'rb')
        ori_f = open(origin_filepath, 'wb')

        with BitReader(com_f) as reader:
            with BitWriter(ori_f) as writer:
                root = Huffman._read_trie(reader)
                text_len = reader.read_bits(Huffman.num_bit_len)
                for i in range(text_len):
                    x = root
                    while not x.is_leaf():
                        if reader.read_bit():
                            x = x.right
                        else:
                            x = x.left
                    writer.write_bits(x.ch, Huffman.char_bit_len)
        com_f.close()
        ori_f.close()
Example #15
0
class Reader(object):
    '''
    Structure stream reader.
    '''

    def __init__(self, string = None, file = None):
        '''
        Wrap a string or file stream in a structure reader.
        Either string or file must be given.
        '''
        if string is not None:
            self.stream = StringIO(string)
        elif file is not None:
            self.stream = file
        else:
            raise "string or file must be non-None"

        # Bit reader buffer
        self.bitter = BitReader(self.readByte)

    def read(self, size):
        '''
        read(size) -> string
        Read bytes into a raw string.
        '''
        return self.stream.read(size)

    def readStruct(self, format):
        '''
        readStruct(format) -> tuple
        Read a structure from the stream.
        Raises FormatError() if bytes were unavailable.
        '''
        size = struct.calcsize(format)
        bytes = self.read(size)
        if len(bytes) != size:
            raise FormatError()
        return struct.unpack(format, bytes)

    def readString(self):
        '''
        readString() -> string
        Read a UTF8-encoded string from the stream.
        Raises FormatError() if bytes were unavailable.
        '''
        size = self.readStruct("!I")[0]
        bytes = self.read(size)
        if len(bytes) != size:
            raise FormatError()
        return bytes.decode("utf-8")

    def readByte(self):
        '''
        readByte() -> int
        Read a single byte.
        '''
        return ord(self.read(1))

    def readBits(self, bits):
        '''
        Read bits (see ppk.bitio.BitReader).
        '''
        return self.bitter(bits)

    def skipBits(self):
        '''
        Skip buffered bits (see ppk.bitio.BitReader).
        '''
        self.bitter.skip()