コード例 #1
0
ファイル: mtf_test.py プロジェクト: EasyHard/pybwt-compress
 def test_both(self):
     for x in xrange(10):
         chars = string.ascii_uppercase + string.digits
         seq = generator(1024*16, chars)
         encode_seq = mtf.encode(chars, seq)
         decode_seq = mtf.decode(chars, encode_seq)
         self.assertEqual(decode_seq, list(seq))
コード例 #2
0
ファイル: bwtzip.py プロジェクト: EasyHard/pybwt-compress
def compress(f, compf, block = None):
    """
    Compress a file by bwt-mtf-huff processes.
    f: a file-like object, content for compressing
    compf: compression result
    """
    # craete alphabet for move-to-front
    content = f.read()
    alphabet_set = set(content)
    alphabet = [''] + list(alphabet_set)
    # dump alphabet
    cPickle.dump(alphabet, compf, 2)
    content = None
    f.seek(0)
    count = 0
    while True:
        data = f.read(BLOCK_SIZE)
        if data == '':
            break
        if block == None or block == count:
            print "block %r:" % count
            bwt_encode, I = bwt.encode(data)
            mtf_encode = mtf.encode(alphabet, bwt_encode)
            # create Huffman tree
            freqs = Counter(mtf_encode)
            if block:
                print freqs
            coding, root = huff.generate_coding(freqs)
            if block:
                print coding
            # encoding
            huff_encode = ''.join(huff.encode(mtf_encode, coding))
            #print "huff_encode:\n%r" % huff_encode
            nbits = len(huff_encode)
            huff_bytes = tobytes(huff_encode)

            compf.write(struct.pack(">2I", nbits, I))
            dump_freqs(compf, freqs)
            compf.write(huff_bytes)

            print "nbits = %r, I = %r, length = %r" % (nbits, I, len(data))
        count = count + 1
コード例 #3
0
ファイル: mtf_test.py プロジェクト: EasyHard/pybwt-compress
 def test_encode(self):
     l = mtf.encode('abcr', 'caraab')
     self.assertEqual(l, [2, 1, 3, 1, 0, 3])