def test_get_codes(self, d): """the sum of len(code) * freq_dict[code] is optimal, so it must be invariant under permutation of the dictionary""" # NB: this also tests huffman_tree indirectly t = huffman_tree(d) c1 = get_codes(t) d2 = list(d.items()) shuffle(d2) d2 = dict(d2) t2 = huffman_tree(d2) c2 = get_codes(t2) self.assertEqual(sum([d[k] * len(c1[k]) for k in d]), sum([d2[k] * len(c2[k]) for k in d2]))
def test_avg_length(self, d): """avg_length should return a float in the interval [0, 8]""" t = huffman_tree(d) f = avg_length(t, d) self.assertTrue(isinstance(f, float)) self.assertTrue(0 <= f <= 8.0)
def test_round_trip(self, b): """test inverting generate_compressed and generate_uncompressed""" orig_text = b freq = make_freq_dict(orig_text) assume(len(freq) > 1) tree = huffman_tree(freq) codes = get_codes(tree) compressed = generate_compressed(orig_text, codes) uncompressed = generate_uncompressed(tree, compressed, len(orig_text)) assert orig_text == uncompressed
def test_number_nodes(self, d): """if the root is an interior node, it must be numbered two less than the number of symbols""" # a complete tree has one fewer interior nodes than # it has leaves, and we are numbering from 0 # NB: this also tests huffman_tree indirectly t = huffman_tree(d) assume(not t.is_leaf()) count = len(d) number_nodes(t) self.assertEqual(count, t.number + 2)
def test_num_nodes_to_bytes(self, b): """num_nodes_to_bytes returns a bytes object that has length 1 (since the number of internal nodes cannot exceed 256)""" # NB: also indirectly tests make_freq_dict and huffman_tree d = make_freq_dict(b) assume(len(d) > 1) t = huffman_tree(d) number_nodes(t) n = num_nodes_to_bytes(t) self.assertTrue(isinstance(n, bytes)) self.assertEqual(len(n), 1)
def test_generate_compressed(self, b): """generate_compressed should return a bytes object that is no longer than the input bytes, and the size of the compressed object should be invariant under permuting the input""" # NB: this also indirectly tests make_freq_dict, huffman_tree, # and get_codes d = make_freq_dict(b) t = huffman_tree(d) c = get_codes(t) compressed = generate_compressed(b, c) self.assertTrue(isinstance(compressed, bytes)) self.assertTrue(len(compressed) <= len(b)) l = list(b) shuffle(l) b = bytes(l) d = make_freq_dict(b) t = huffman_tree(d) c = get_codes(t) compressed2 = generate_compressed(b, c) self.assertEqual(len(compressed2), len(compressed))
def test_tree_to_bytes(self, b): """tree_to_bytes generates a bytes representation of a post-order traversal of a trees internal nodes""" # Since each internal node requires 4 bytes to represent, # and there are 1 fewer internal node than distinct symbols, # the length of the bytes produced should be 4 times the # length of the frequency dictionary, minus 4""" # NB: also indirectly tests make_freq_dict, huffman_tree, and # number_nodes d = make_freq_dict(b) assume(len(d) > 1) t = huffman_tree(d) number_nodes(t) output_bytes = tree_to_bytes(t) dictionary_length = len(d) leaf_count = dictionary_length self.assertEqual(4 * (leaf_count - 1), len(output_bytes))
def test_huffman_tree(self, d): """huffman_tree returns a non-leaf HuffmanNode""" t = huffman_tree(d) self.assertTrue(isinstance(t, HuffmanNode)) self.assertTrue(not t.is_leaf())