Example #1
0
def main():
	symb = [(.07,'a'), (.09,'b'),(.12,'c'),(.22,'d'),(.23,'e'),(.27,'f')]

	tree = huff.makeHuff(symb)
	
	if tree.left.left != 'd' or \
	tree.left.right != 'e' or \
	tree.right.left != 'f' or \
	tree.right.right.left !='c' or \
	tree.right.right.right.left !='a' or \
	tree.right.right.right.right != 'b':
		passed = False
	else:
		passed = True

	testPass("makeHuff",passed)
	passed = True

	result =  huff.encode(tree)

	expected = {'a':'1110','c':'110','b':'1111','e':'01','d':'00','f':'10'}

	if result != expected:
		passed = False
	
	print "symb | prob | code"
	for i in sorted(symb):
		print i[1], "   |", (i[0]), "| ",  result[i[1]]

	testPass("encode",passed)
Example #2
0
    def test_same(self):
        seq = [0] * 16
        from collections import Counter

        freqs = Counter(seq)
        coding, root = huff.generate_coding(freqs)
        encode = huff.encode(seq, coding)
        decode = huff.decode("".join(encode), root)
        self.assertEqual(list(seq), decode)
Example #3
0
    def test_mix(self):
        seq = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5]
        random.shuffle(seq)
        from collections import Counter

        freqs = Counter(seq)
        expect = {1: "10", 2: "01", 3: "11", 4: "001", 5: "000"}
        coding, root = huff.generate_coding(freqs)
        self.assertDictEqual(expect, coding)

        encode = huff.encode(seq, coding)
        decode = huff.decode("".join(encode), root)
        self.assertEqual(seq, decode)
Example #4
0
def compress(f, compf, block = None):
    """
    Compress a file by bwt-mtf-huff processes.
    f: a file-like object, content for compressing
    compf: compression result
    """
    # craete alphabet for move-to-front
    content = f.read()
    alphabet_set = set(content)
    alphabet = [''] + list(alphabet_set)
    # dump alphabet
    cPickle.dump(alphabet, compf, 2)
    content = None
    f.seek(0)
    count = 0
    while True:
        data = f.read(BLOCK_SIZE)
        if data == '':
            break
        if block == None or block == count:
            print "block %r:" % count
            bwt_encode, I = bwt.encode(data)
            mtf_encode = mtf.encode(alphabet, bwt_encode)
            # create Huffman tree
            freqs = Counter(mtf_encode)
            if block:
                print freqs
            coding, root = huff.generate_coding(freqs)
            if block:
                print coding
            # encoding
            huff_encode = ''.join(huff.encode(mtf_encode, coding))
            #print "huff_encode:\n%r" % huff_encode
            nbits = len(huff_encode)
            huff_bytes = tobytes(huff_encode)

            compf.write(struct.pack(">2I", nbits, I))
            dump_freqs(compf, freqs)
            compf.write(huff_bytes)

            print "nbits = %r, I = %r, length = %r" % (nbits, I, len(data))
        count = count + 1
Example #5
0
def main():
    symb = [(.07, 'a'), (.09, 'b'), (.12, 'c'), (.22, 'd'), (.23, 'e'),
            (.27, 'f')]

    tree = huff.makeHuff(symb)

    if tree.left.left != 'd' or \
    tree.left.right != 'e' or \
    tree.right.left != 'f' or \
    tree.right.right.left !='c' or \
    tree.right.right.right.left !='a' or \
    tree.right.right.right.right != 'b':
        passed = False
    else:
        passed = True

    testPass("makeHuff", passed)
    passed = True

    result = huff.encode(tree)

    expected = {
        'a': '1110',
        'c': '110',
        'b': '1111',
        'e': '01',
        'd': '00',
        'f': '10'
    }

    if result != expected:
        passed = False

    print "symb | prob | code"
    for i in sorted(symb):
        print i[1], "   |", (i[0]), "| ", result[i[1]]

    testPass("encode", passed)