Esempio n. 1
0
    def compress(origin_filepath, compress_filepath):
        """
        将``原始文件``压缩到``压缩文件``中
        :param origin_filepath: 原始文件
        :param compress_filepath: 压缩文件
        :return: 没有返回值
        """
        ori_f = open(origin_filepath, 'rb')
        com_f = open(compress_filepath, 'wb')

        with BitReader(ori_f) as reader:
            with BitWriter(com_f) as writer:
                cnt = 0
                old = False
                while True:
                    b = True if reader.read_bit() else False
                    if not reader.read:  # End-of-file?
                        break
                    if b is not old:
                        writer.write_bits(cnt, RunLength.encoding_length)
                        cnt = 0
                        old = not old
                    else:
                        if cnt == RunLength.max_length:
                            writer.write_bits(cnt, RunLength.encoding_length)
                            cnt = 0  # 另一种比特长度为0, 然后可以接着继续之前的比特计数
                            writer.write_bits(cnt, RunLength.encoding_length)
                    cnt += 1
                writer.write_bits(cnt, RunLength.encoding_length)

        ori_f.close()
        com_f.close()
Esempio n. 2
0
def compress(tree, uncompressed, compressed):
    '''First write the given tree to the stream 'compressed' using the
    write_tree function. Then use the same tree to encode the data
    from the input stream 'uncompressed' and write it to 'compressed'.
    If there are any partially-written bytes remaining at the end,
    write 0 bits to form a complete byte.

    Args:
      tree: A Huffman tree.
      uncompressed: A file stream from which you can read the input.
      compressed: A file stream that will receive the tree description
          and the coded input data.
    '''
    table = huffman.make_encoding_table(tree)
    uncomp = BitReader(uncompressed)
    comp = BitWriter(compressed)

    write_tree(tree, comp)
    while True:
        try:
            uncomp_btye = uncomp.readbits(8)
            print(uncomp_btye)

            comp_path = table[uncomp_btye]

            for bit in comp_path:
                if bit == False:
                    comp.writebit(0)
                elif bit == True:
                    comp.writebit(1)
            print(comp_path)

        except EOFError:
            comp_path = table[None]
            print("EOF")

            for bit in comp_path:
                comp.writebit(bit)
            print(comp_path)
            break

    comp.flush()
 def extract(self, output_file):
     """
     Restore the compressed data
     """
     br = BitReader(self.file)
     root = self._read_trie(br)
     # number of bytes to write
     length = int(br.readbits(8))
     bw = BitWriter(output_file)
     # decode using the Huffman trie
     for i in xrange(length):
         node = root
         while not node.is_leaf:
             bit = br.readbit()
             if bit:
                 node = node.right
             else:
                 node = node.left
         # write the character to output
         bw.writebits(node.char, 8)
Esempio n. 4
0
def decompress(compressed, uncompressed):
    '''First, read a Huffman tree from the 'compressed' stream using your
    read_tree function. Then use that tree to decode the rest of the
    stream and write the resulting symbols to the 'uncompressed'
    stream.

    Args:
      compressed: A file stream from which compressed input is read.
      uncompressed: A writable file stream to which the uncompressed
          output is written.

    '''
    comp = BitReader(compressed)
    uncomp = BitWriter(uncompressed)

    tree = read_tree(comp)

    while True:
        try:
            uncomp_byte = huffman.decode(tree, comp)
            if uncomp_byte == None:
                raise EOFError
            uncomp.writebits(uncomp_byte, 8)

        except EOFError:
            uncomp.writebits(29, 8)
            break
Esempio n. 5
0
 def compress(self, output_file):
     """
     Compress data from input file and write compressed data to the output file.
     """
     # create a ternary search trie and fill it with single ASCII characters
     st = TernarySt()
     for i in xrange(self.radix):
         st[chr(i)] = i
     code = self.radix + 1
     # read all the data from the input file (not optimal, but easy to code)
     data = self.file.read()
     bw = BitWriter(output_file)
     while len(data) > 0:
         lp = st.longest_prefix(data)
         # write the value of the prefix to output
         bw.writebits(st[lp], self.codeword_width)
         if len(lp) < len(data) and code < self.codeword_limit:
             # add new prefix to the symbol table
             st[data[:len(lp) + 1]] = code
             code += 1
         data = data[len(lp):]
     bw.writebits(self.radix, self.codeword_width)
Esempio n. 6
0
    def __init__(self, stream = None):
        '''
        Wrap a stream with a structure writer.
        An internal StringIO stream is created if none is given.
        In this case, call finish() to retrieve the stream's contents.
        '''
        if stream is None:
            self.stream = StringIO()
        else:
            self.stream = stream

        # Bit writer buffer
        self.bitter = BitWriter(self.writeByte)
Esempio n. 7
0
    def compress(origin_filepath, compress_filepath):
        """
        将``原始文件``压缩到``压缩文件``中
        :param origin_filepath: 原始文件
        :param compress_filepath: 压缩文件
        :return: 没有返回值
        """
        # 统计频率(一轮读取)
        freq = {}
        text_len = 0
        with open(origin_filepath, 'rb') as ori_f:
            with BitReader(ori_f) as reader:
                while True:
                    ch = reader.read_bits(Huffman.char_bit_len)
                    if not reader.read:
                        break
                    freq[ch] = freq.get(ch, 0) + 1
                    text_len += 1

        ori_f = open(origin_filepath, 'rb')
        com_f = open(compress_filepath, 'wb')

        with BitReader(ori_f) as reader:
            with BitWriter(com_f) as writer:
                root = Huffman._build_trie(freq)  # 构建Huffman树
                code_table = {}
                Huffman._build_code(code_table, root, '')  # 构建Huffman编码映射表
                Huffman._write_trie(root, writer)  # 将trie写入压缩文件, 解压时用
                writer.write_bits(text_len, Huffman.num_bit_len)  # 写入输入长度

                # 使用Huffman code编码文件(二轮读取)
                while True:
                    ch = reader.read_bits(Huffman.char_bit_len)
                    if not reader.read:
                        break
                    code = code_table[ch]
                    for b in code:
                        if b == '0':
                            writer.write_bit(False)
                        elif b == '1':
                            writer.write_bit(True)
                        else:
                            raise Exception('Illegal state')
        ori_f.close()
        com_f.close()
Esempio n. 8
0
 def expand(compress_filepath, origin_filepath):
     """
     将``压缩文件``, 解压写到``原始文件``中
     :param compress_filepath: 压缩文件
     :param origin_filepath: 原始文件
     :return: 不返回任何值
     """
     com_f = open(compress_filepath, 'rb')
     ori_f = open(origin_filepath, 'wb')
     with BitReader(com_f) as reader:
         with BitWriter(ori_f) as writer:
             b = False
             while True:
                 cnt = reader.read_bits(RunLength.encoding_length)
                 if not reader.read:  # End-of-file?
                     break
                 for i in range(cnt):
                     writer.write_bit(b)
                 b = not b
     com_f.close()
     ori_f.close()
    def compress(self, output_file):
        """
        Compress data from input file and write compressed data to the output file.
        """
        # read input and count chars
        freq = defaultdict(int)
        length = 0
        while True:
            # read one char from a file
            char = self.file.read(1)
            if char:
                length += 1
                freq[ord(char)] += 1
            else:
                # EOF
                break

        # build Huffman trie
        root = self._build_trie(freq)
        # build symbol table for chars and their binary representation
        st = dict()
        self._build_code(st, root, '')
        bw = BitWriter(output_file)
        # write the Huffman trie binary representation to the file
        self._write_trie(root, bw)
        # write number of bytes in original uncompressed message
        bw.writebits(length, 8)
        # use Huffman code to encode input
        for i in xrange(length):
            self.file.seek(i)
            code = st[ord(self.file.read(1))]
            for c in code:
                if c == '0':
                    bw.writebit(False)
                else:
                    bw.writebit(True)
Esempio n. 10
0
    def compress(origin_filepath, compress_filepath):
        """
        将``原始文件``压缩到``压缩文件``中
        :param origin_filepath: 原始文件
        :param compress_filepath: 压缩文件
        :return: 没有返回值
        """
        st = TST()
        for i in range(LZW.char_set_len):
            st.put(chr(i), i)
        code = LZW.char_set_len + 1  # 留出char_set_len这个数字为EOF编码

        ori_f = open(origin_filepath, 'rb')
        com_f = open(compress_filepath, 'wb')

        with BitReader(ori_f) as reader:
            with BitWriter(com_f) as writer:
                # 把8位的一个字节看作一个字符作为输入(可以处理任意文件)
                input_string = []
                while True:
                    ch = reader.read_bits(LZW.char_bit_len)
                    if not reader.read:
                        break
                    input_string.append(chr(ch))
                input_string = ''.join(input_string)

                while len(input_string) > 0:
                    s = st.longest_prefix_of(input_string)  # 最长前缀
                    writer.write_bits(st.get(s),
                                      LZW.code_bit_len)  # 将s的编码写入压缩文件
                    if len(s) < len(input_string) and code < LZW.code_set_len:
                        # 将此(最长前缀+前瞻字符)构成的新子串和下一编码关联并加入符号表
                        st.put(input_string[:len(s) + 1], code)
                        code += 1
                    input_string = input_string[len(s):]  # 输入中s完成读取
                writer.write_bits(LZW.char_set_len, LZW.code_bit_len)  # EOF的编码
        ori_f.close()
        com_f.close()
Esempio n. 11
0
    def expand(compress_filepath, origin_filepath):
        """
        将``压缩文件``, 解压写到``原始文件``中
        :param compress_filepath: 压缩文件
        :param origin_filepath: 原始文件
        :return: 不返回任何值
        """
        st = []
        for i in range(LZW.char_set_len):  # 用字符初始化编译表
            st.append(chr(i))
        st.append('')  # (并未使用), 看作EOF的前瞻字符

        com_f = open(compress_filepath, 'rb')
        ori_f = open(origin_filepath, 'wb')

        with BitReader(com_f) as reader:
            with BitWriter(ori_f) as writer:
                codeword = reader.read_bits(LZW.code_bit_len)
                if codeword != LZW.char_set_len:  # 文件结尾
                    val = st[codeword]
                    while True:
                        for ch in val:  # 子字符串写入
                            writer.write_bits(ord(ch), LZW.char_bit_len)
                        codeword = reader.read_bits(LZW.code_bit_len)
                        if codeword == LZW.char_set_len:
                            break
                        if len(st) == codeword:  # 需要读取的编码正是要补全符号表的条目
                            s = val + val[
                                0]  # 这种情况下,前瞻字符必然是当前字符串首字母(好好思考下, ABABABA)
                        else:
                            s = st[codeword]  # 获取当前编码关联的字符串
                        if len(st) < LZW.code_set_len:
                            st.append(val + s[0])
                        val = s
        com_f.close()
        ori_f.close()
Esempio n. 12
0
    def expand(compress_filepath, origin_filepath):
        """
        将``压缩文件``, 解压写到``原始文件``中
        :param compress_filepath: 压缩文件
        :param origin_filepath: 原始文件
        :return: 不返回任何值
        """
        com_f = open(compress_filepath, 'rb')
        ori_f = open(origin_filepath, 'wb')

        with BitReader(com_f) as reader:
            with BitWriter(ori_f) as writer:
                root = Huffman._read_trie(reader)
                text_len = reader.read_bits(Huffman.num_bit_len)
                for i in range(text_len):
                    x = root
                    while not x.is_leaf():
                        if reader.read_bit():
                            x = x.right
                        else:
                            x = x.left
                    writer.write_bits(x.ch, Huffman.char_bit_len)
        com_f.close()
        ori_f.close()
Esempio n. 13
0
 def compress(self, output_file):
     """
     Compress data from input file and write compressed data to the output file.
     """
     # create a ternary search trie and fill it with single ASCII characters
     st = TernarySt()
     for i in xrange(self.radix):
         st[chr(i)] = i
     code = self.radix + 1
     # read all the data from the input file (not optimal, but easy to code)
     data = self.file.read()
     bw = BitWriter(output_file)
     while len(data) > 0:
         lp = st.longest_prefix(data)
         # write the value of the prefix to output
         bw.writebits(st[lp], self.codeword_width)
         if len(lp) < len(data) and code < self.codeword_limit:
             # add new prefix to the symbol table
             st[data[:len(lp) + 1]] = code
             code += 1
         data = data[len(lp):]
     bw.writebits(self.radix, self.codeword_width)
Esempio n. 14
0
class Writer(object):
    '''
    Structure stream writer.
    '''

    def __init__(self, stream = None):
        '''
        Wrap a stream with a structure writer.
        An internal StringIO stream is created if none is given.
        In this case, call finish() to retrieve the stream's contents.
        '''
        if stream is None:
            self.stream = StringIO()
        else:
            self.stream = stream

        # Bit writer buffer
        self.bitter = BitWriter(self.writeByte)

    def writeStruct(self, format, *items):
        '''
        Write a structure.
        '''
        self.flushBits()
        bytes = struct.pack(format, *items)
        self.stream.write(bytes)

    def writeString(self, string):
        '''
        Write a UTF8-encoded string.
        '''
        self.flushBits()
        bytes = string.encode("utf-8")
        self.writeStruct("!I", len(bytes))
        self.stream.write(bytes)

    def writeByte(self, value):
        '''
        Write a single byte.
        '''
        self.stream.write(chr(value))

    def writeBits(self, bits, value):
        '''
        Write a series of bits (see ppk.bitio.BitWriter).
        '''
        self.bitter(bits, value)

    def flushBits(self):
        '''
        Flush buffered bits, if any (see ppk.bitio.BitWriter).
        '''
        self.bitter.flush()

    def finish(self):
        '''
        finish() -> contents
        Retrieve stream contents if backed by StringIO.
        '''
        self.flushBits()
        return self.stream.getvalue()