예제 #1
0
def compress(f, compf, block = None):
    """
    Compress a file by bwt-mtf-huff processes.
    f: a file-like object, content for compressing
    compf: compression result
    """
    # craete alphabet for move-to-front
    content = f.read()
    alphabet_set = set(content)
    alphabet = [''] + list(alphabet_set)
    # dump alphabet
    cPickle.dump(alphabet, compf, 2)
    content = None
    f.seek(0)
    count = 0
    while True:
        data = f.read(BLOCK_SIZE)
        if data == '':
            break
        if block == None or block == count:
            print "block %r:" % count
            bwt_encode, I = bwt.encode(data)
            mtf_encode = mtf.encode(alphabet, bwt_encode)
            # create Huffman tree
            freqs = Counter(mtf_encode)
            if block:
                print freqs
            coding, root = huff.generate_coding(freqs)
            if block:
                print coding
            # encoding
            huff_encode = ''.join(huff.encode(mtf_encode, coding))
            #print "huff_encode:\n%r" % huff_encode
            nbits = len(huff_encode)
            huff_bytes = tobytes(huff_encode)

            compf.write(struct.pack(">2I", nbits, I))
            dump_freqs(compf, freqs)
            compf.write(huff_bytes)

            print "nbits = %r, I = %r, length = %r" % (nbits, I, len(data))
        count = count + 1
예제 #2
0
 def test_mix(self):
     seq = generator(16*1024)
     encode, I = bwt.encode(seq)
     decode = bwt.decode(encode, I, True)
     self.assertEqual(list(seq), decode)
예제 #3
0
 def test_encode(self):
     seq = 'abraca'
     encode, I = bwt.encode(seq, indexes = [5, 0, 3, 1, 4, 2])
     self.assertEqual(list('caraab'), encode)
     self.assertEqual(I, 1)