def test_build_tree(self): leaves = [HuffmanNode(weight) for weight in range(1, 5)] root = HuffmanNode.build_tree(leaves) # assertions based on calculating these by hand self.assertEqual(root.weight, 10) self.assertEqual(leaves[0].code, '110') self.assertEqual(leaves[1].code, '111') self.assertEqual(leaves[2].code, '10') self.assertEqual(leaves[3].code, '0') for leaf in leaves: self.assertTrue(leaf.is_leaf)
def _unpack_data(data): """ :param data: binary data as read from a file or pulled directly out of a certificate extension. Data should be compressed with huffman coding as described for v3 entitlement certificates :type data: binary string :return: tuple: (list of HuffmanNode instances not yet in a tree, binary string of leftover bits that were not part of the zlib-compressed word list :rtype: tuple(list, binary string) """ decompress = zlib.decompressobj() decompressed_data = decompress.decompress(data) # ordered list of words that will be composed into a huffman tree words = decompressed_data.split(b'\0') words = [word.decode('utf-8') for word in words] # enumerate() would be better here, but lacks a 'start' arg in 2.4 weighted_words = list(zip(itertools.count(1), words)) # huffman nodes, without having put them in a tree. These will all be # leaves in the tree. nodes = [ HuffmanNode(weight, value) for weight, value in weighted_words ] return nodes, decompress.unused_data
def __init__(self, data): """ Uncompresses data into a tree that can be traversed for matching paths :param data: binary data as read from a file or pulled directly out of a certificate extension. Data should be compressed with huffman coding as described for v3 entitlement certificates :type data: binary string """ word_leaves, unused_bits = self._unpack_data(data) HuffmanNode.build_tree(word_leaves) word_dict = dict((node.code, node.value) for node in word_leaves) bitstream = GhettoBitStream(unused_bits) path_leaves = self._generate_path_leaves(bitstream) HuffmanNode.build_tree(path_leaves) path_dict = dict((node.code, node) for node in path_leaves) self.path_tree = self._generate_path_tree(path_dict, path_leaves, word_dict, bitstream)
def __init__(self, data): """ Uncompresses data into a tree that can be traversed for matching paths :param data: binary data as read from a file or pulled directly out of a certificate extension. Data should be compressed with huffman coding as described for v3 entitlement certificates :type data: binary string """ word_leaves, unused_bits = self._unpack_data(data) HuffmanNode.build_tree(word_leaves) word_dict = dict((node.code, node.value) for node in word_leaves) bitstream = GhettoBitStream(unused_bits) path_leaves = self._generate_path_leaves(bitstream) HuffmanNode.build_tree(path_leaves) path_dict = dict((node.code, node) for node in path_leaves) self.path_tree = self._generate_path_tree( path_dict, path_leaves, word_dict, bitstream)
def _generate_path_leaves(cls, bitstream): """ Given the remaining bits after decompressing the word list, this generates HummanNode objects to represent each node (besides root) that will end up in the path tree. :param bitstream: stream of bits remaining after decompressing the word list :type bitstream: rhsm.bitstream.GhettoBitStream :return: list of HuffmanNode objects that can be used to build a path tree :rtype: list of HuffmanNode objects """ node_count = cls._get_node_count(bitstream) nodes = [] # make leaves for a huffman tree and exclude the root node of the path # tree, because we don't need a reference code for that. for weight in range(1, node_count): node = HuffmanNode(weight, {}) nodes.append(node) return nodes
def test_root_weight(self): # sanity check that total weight should be sum of all node weights for n in range(4, 100): leaves = [HuffmanNode(weight) for weight in range(1, n)] tree = HuffmanNode.build_tree(leaves) self.assertEqual(tree.weight, sum(leaf.weight for leaf in leaves))
def test_compare_eq(self): node = HuffmanNode(3) self.assertEqual(self.parent, node)
def setUp(self): self.node1 = HuffmanNode(1) self.node2 = HuffmanNode(2) self.parent = HuffmanNode.combine(self.node1, self.node2)