class TestMerkleTree2(unittest.TestCase): """ Test MerkleTree behavior with deeper directories. """ def setUp(self): self.rng = SimpleRNG(time.time()) def tearDown(self): pass def do_test_deepish_trees(self, hashtype): """ Build a directory of random data, then its MerkleTree, then round trip to a serialization and back. """ tree_top = os.path.join('tmp', self.rng.next_file_name(MAX_NAME_LEN)) while os.path.exists(tree_top): tree_top = os.path.join( 'tmp', self.rng.next_file_name(MAX_NAME_LEN)) # Generate a quasi-random data directory, 7 deep, up to 5 files/dir self.rng.next_data_dir(tree_top, depth=7, width=5, max_len=4096) # Build a MerkleTree specifying the directory. tree = MerkleTree.create_from_file_system(tree_top, hashtype) # ROUND TRIP 1 ---------------------------------------------- # Serialize it. ser = tree.__str__() # Deserialize to make another MerkleTree. tree2 = MerkleTree.create_from_serialization(ser, hashtype) self.assertTrue(tree2.__eq__(tree)) self.assertEqual(tree2, tree) # identical test # ROUND TRIP 2 ---------------------------------------------- strings = ser.split('\n') strings = strings[:-1] tree3 = MerkleTree.create_from_string_array(strings, hashtype) self.assertEqual(tree3, tree) # ROUND TRIP 3 ---------------------------------------------- filename = os.path.join('tmp', self.rng.next_file_name(8)) while os.path.exists(filename): filename = os.path.join('tmp', self.rng.next_file_name(8)) with open(filename, 'w') as file: file.write(ser) tree4 = MerkleTree.create_from_file(filename, hashtype) self.assertEqual(tree4, tree) def test_deepish_trees(self): """ Test behavior of deeper trees using various SHA hash types. """ for hashtype in HashTypes: self.do_test_deepish_trees(hashtype)
class TestMerkleTree2(unittest.TestCase): """ Test MerkleTree behavior with deeper directories. """ def setUp(self): self.rng = SimpleRNG(time.time()) def tearDown(self): pass def do_test_deepish_trees(self, hashtype): """ Build a directory of random data, then its MerkleTree, then round trip to a serialization and back. """ tree_top = os.path.join('tmp', self.rng.next_file_name(MAX_NAME_LEN)) while os.path.exists(tree_top): tree_top = os.path.join('tmp', self.rng.next_file_name(MAX_NAME_LEN)) # Generate a quasi-random data directory, 7 deep, up to 5 files/dir self.rng.next_data_dir(tree_top, depth=7, width=5, max_len=4096) # Build a MerkleTree specifying the directory. tree = MerkleTree.create_from_file_system(tree_top, hashtype) # ROUND TRIP 1 ---------------------------------------------- # Serialize it. ser = tree.__str__() # Deserialize to make another MerkleTree. tree2 = MerkleTree.create_from_serialization(ser, hashtype) self.assertTrue(tree2.__eq__(tree)) self.assertEqual(tree2, tree) # identical test # ROUND TRIP 2 ---------------------------------------------- strings = ser.split('\n') strings = strings[:-1] tree3 = MerkleTree.create_from_string_array(strings, hashtype) self.assertEqual(tree3, tree) # ROUND TRIP 3 ---------------------------------------------- filename = os.path.join('tmp', self.rng.next_file_name(8)) while os.path.exists(filename): filename = os.path.join('tmp', self.rng.next_file_name(8)) with open(filename, 'w') as file: file.write(ser) tree4 = MerkleTree.create_from_file(filename, hashtype) self.assertEqual(tree4, tree) def test_deepish_trees(self): """ Test behavior of deeper trees using various SHA hash types. """ for hashtype in HashTypes: self.do_test_deepish_trees(hashtype)
class TestMerkleTree(unittest.TestCase): """ Test package functionality at the Tree level. """ def setUp(self): self.rng = SimpleRNG(time.time()) def tearDown(self): pass # utility functions --------------------------------------------- def get_two_unique_directory_names(self): """ Make two different quasi-random directory names.""" dir_name1 = self.rng.next_file_name(MAX_NAME_LEN) dir_name2 = dir_name1 while dir_name2 == dir_name1: dir_name2 = self.rng.next_file_name(MAX_NAME_LEN) self.assertTrue(len(dir_name1) > 0) self.assertTrue(len(dir_name2) > 0) self.assertTrue(dir_name1 != dir_name2) return (dir_name1, dir_name2) def make_one_named_test_directory(self, name, depth, width): """ Make a directory tree with a specific name, depth and width.""" dir_path = "tmp/%s" % name if os.path.exists(dir_path): if os.path.isfile(dir_path): os.unlink(dir_path) elif os.path.isdir(dir_path): shutil.rmtree(dir_path) self.rng.next_data_dir(dir_path, depth, width, 32) return dir_path def make_two_test_directories(self, depth, width): """ Create two test directories with different names. """ dir_name1 = self.rng.next_file_name(MAX_NAME_LEN) dir_path1 = self.make_one_named_test_directory(dir_name1, depth, width) dir_name2 = dir_name1 while dir_name2 == dir_name1: dir_name2 = self.rng.next_file_name(MAX_NAME_LEN) dir_path2 = self.make_one_named_test_directory(dir_name2, depth, width) return (dir_name1, dir_path1, dir_name2, dir_path2) def verify_leaf_sha(self, node, path_to_file, hashtype): """ Verify a leaf node is hashed correctly, using a specific SHA hash type. """ self.assertTrue(os.path.exists(path_to_file)) with open(path_to_file, "rb") as file: data = file.read() self.assertFalse(data is None) if hashtype == HashTypes.SHA1: sha = XLSHA1() elif hashtype == HashTypes.SHA2: sha = XLSHA2() elif hashtype == HashTypes.SHA3: sha = XLSHA3() elif hashtype == HashTypes.BLAKE2B: sha = XLBLAKE2B_256() else: raise NotImplementedError sha.update(data) hash_ = sha.digest() self.assertEqual(hash_, node.bin_hash) def verify_tree_sha(self, node, path_to_node, hashtype): """ Verify tree elements are hashed correctly, assuming that the node is a MerkleTree, using a specific SHA hash type. """ if node.nodes is None: self.assertEqual(None, node.bin_hash) else: hash_count = 0 if hashtype == HashTypes.SHA1: sha = XLSHA1() elif hashtype == HashTypes.SHA2: sha = XLSHA2() elif hashtype == HashTypes.SHA3: sha = XLSHA3() elif hashtype == HashTypes.BLAKE2B: sha = XLBLAKE2B_256() else: raise NotImplementedError for node_ in node.nodes: path_to_file = os.path.join(path_to_node, node_.name) if isinstance(node_, MerkleLeaf): self.verify_leaf_sha(node_, path_to_file, hashtype) elif isinstance(node_, MerkleTree): self.verify_tree_sha(node_, path_to_file, hashtype) else: self.fail("unknown node type!") if node_.bin_hash is not None: hash_count += 1 sha.update(node_.bin_hash) # take care to compare values of the same type; # node.binHash is binary, node.hexHash is hex if hash_count == 0: self.assertEqual(None, node.bin_hash) else: self.assertEqual(sha.digest(), node.bin_hash) # unit tests ---------------------------------------------------- def test_pathless_unbound(self): """ Test basic characteristics of very simple MerkleTrees created using our standard SHA hash types. """ for using in [ HashTypes.SHA1, HashTypes.SHA2, HashTypes.SHA3, HashTypes.BLAKE2B ]: self.do_test_pathless_unbound(using) def do_test_pathless_unbound(self, hashtype): """ Test basic characteristics of very simple MerkleTrees created using a specific SHA hash type. """ (dir_name1, dir_name2) = self.get_two_unique_directory_names() check_hashtype(hashtype) tree1 = MerkleTree(dir_name1, hashtype) self.assertEqual(dir_name1, tree1.name) if hashtype == HashTypes.SHA1: self.assertEqual(SHA1_HEX_NONE, tree1.hex_hash) elif hashtype == HashTypes.SHA2: self.assertEqual(SHA2_HEX_NONE, tree1.hex_hash) elif hashtype == HashTypes.SHA3: self.assertEqual(SHA3_HEX_NONE, tree1.hex_hash) elif hashtype == HashTypes.BLAKE2B_256: self.assertEqual(BLAKE2B_256_HEX_NONE, tree1.hex_hash) else: raise NotImplementedError tree2 = MerkleTree(dir_name2, hashtype) self.assertEqual(dir_name2, tree2.name) # these tests remain skimpy self.assertFalse(tree1 is None) self.assertTrue(tree1 == tree1) self.assertFalse(tree1 == tree2) tree1_str = tree1.to_string(0) # there should be no indent on the first line self.assertFalse(tree1_str[0] == ' ') # no extra lines should be added lines = tree1_str.split('\n') # this split generates an extra blank line, because the serialization # ends with CR-LF if lines[-1] == '': lines = lines[:-1] self.assertEqual(1, len(lines)) tree1_rebuilt = MerkleTree.create_from_serialization( tree1_str, hashtype) self.assertTrue(tree1 == tree1_rebuilt) def test_bound_flat_dirs(self): """ Test handling of flat directories with a few data files using varioush SHA hash types. """ for using in [ HashTypes.SHA1, HashTypes.SHA2, HashTypes.SHA3, ]: self.do_test_bound_flat_dirs(using) def do_test_bound_flat_dirs(self, hashtype): """test directory is single level, with four data files""" check_hashtype(hashtype) (dir_name1, dir_path1, dir_name2, dir_path2) =\ self.make_two_test_directories(ONE, FOUR) tree1 = MerkleTree.create_from_file_system(dir_path1, hashtype) self.assertEqual(dir_name1, tree1.name) nodes1 = tree1.nodes self.assertTrue(nodes1 is not None) self.assertEqual(FOUR, len(nodes1)) self.verify_tree_sha(tree1, dir_path1, hashtype) tree2 = MerkleTree.create_from_file_system(dir_path2, hashtype) self.assertEqual(dir_name2, tree2.name) nodes2 = tree2.nodes self.assertTrue(nodes2 is not None) self.assertEqual(FOUR, len(nodes2)) self.verify_tree_sha(tree2, dir_path2, hashtype) self.assertFalse(tree1 is None) self.assertTrue(tree1 == tree1) self.assertFalse(tree1 == tree2) tree1_str = tree1.to_string(0) tree1_rebuilt = MerkleTree.create_from_serialization( tree1_str, hashtype) self.assertTrue(tree1 == tree1_rebuilt) def test_bound_needle_dirs(self): """ Test directories four deep with various SHA hash types. """ for using in [ HashTypes.SHA1, HashTypes.SHA2, HashTypes.SHA3, ]: self.do_test_bound_needle_dirs(using) def do_test_bound_needle_dirs(self, hashtype): """test directories four deep with one data file at the lowest level""" (dir_name1, dir_path1, dir_name2, dir_path2) =\ self.make_two_test_directories(FOUR, ONE) tree1 = MerkleTree.create_from_file_system(dir_path1, hashtype) self.assertEqual(dir_name1, tree1.name) nodes1 = tree1.nodes self.assertTrue(nodes1 is not None) self.assertEqual(ONE, len(nodes1)) self.verify_tree_sha(tree1, dir_path1, hashtype) tree2 = MerkleTree.create_from_file_system(dir_path2, hashtype) self.assertEqual(dir_name2, tree2.name) nodes2 = tree2.nodes self.assertTrue(nodes2 is not None) self.assertEqual(ONE, len(nodes2)) self.verify_tree_sha(tree2, dir_path2, hashtype) self.assertTrue(tree1 == tree1) self.assertFalse(tree1 == tree2) tree1_str = tree1.to_string(0) tree1_rebuilt = MerkleTree.create_from_serialization( tree1_str, hashtype) # # DEBUG # print "NEEDLEDIR TREE1:\n" + tree1Str # print "REBUILT TREE1:\n" + tree1Rebuilt.toString("") # # END self.assertTrue(tree1 == tree1_rebuilt) # tests of bugs previously found -------------------------------- def test_gray_boxes_bug1(self): """ Verify that bug #1 in handling serialization of grayboxes website has been corrected. """ serialization =\ '721a08022dd26e7be98b723f26131786fd2c0dc3 grayboxes.com/\n' +\ ' fcd3973c66230b9078a86a5642b4c359fe72d7da images/\n' +\ ' 15e47f4eb55197e1bfffae897e9d5ce4cba49623 grayboxes.gif\n' +\ ' 2477b9ea649f3f30c6ed0aebacfa32cb8250f3df index.html\n' # create from string array ---------------------------------- string = serialization.split('\n') string = string[:-1] self.assertEqual(4, len(string)) tree2 = MerkleTree.create_from_string_array(string, HashTypes.SHA1) ser2 = tree2.to_string(0) self.assertEqual(serialization, ser2) # create from serialization --------------------------------- tree1 = MerkleTree.create_from_serialization(serialization, HashTypes.SHA1) ser1 = tree1.to_string(0) self.assertEqual(serialization, ser1) self.assertTrue(tree1 == tree2) # 2014-06-26 tagged this on here to test firstLineRE_1() first_line = string[0] match_ = MerkleTree.first_line_re_1().match(first_line) self.assertTrue(match_ is not None) self.assertEqual(match_.group(1), '') # indent tree_hash = match_.group(2) dir_name = match_.group(3) self.assertEqual(tree_hash + ' ' + dir_name, first_line) def test_xlattice_bug1(self): """ this test relies on dat.xlattice.org being locally present and an internally consistent merkleization """ with open('tests/test_data/dat.xlattice.org', 'rb') as file: serialization = str(file.read(), 'utf-8') # create from serialization --------------------------------- tree1 = MerkleTree.create_from_serialization(serialization, HashTypes.SHA1) # # DEBUG # print "tree1 has %d nodes" % len(tree1.nodes) # with open('junk.tree1', 'w') as t: # t.write( tree1.toString(0) ) # # END ser1 = tree1.to_string(0) self.assertEqual(serialization, ser1) # create from string array ---------------------------------- string = serialization.split('\n') string = string[:-1] self.assertEqual(2511, len(string)) tree2 = MerkleTree.create_from_string_array(string, HashTypes.SHA1) ser2 = tree2.to_string(0) self.assertEqual(serialization, ser2) self.assertTrue(tree1 == tree2) def test_gray_boxes_bug3(self): """ Test solution to bug in handling grayboxes website. """ serialization =\ '088d0e391e1a4872329e0f7ac5d45b2025363e26c199a7' + \ '4ea39901d109afd6ba grayboxes.com/\n' +\ ' 24652ddc14687866e6b1251589aee7e1e3079a87f80cd' + \ '7775214f6d837612a90 images/\n' +\ ' 1eb774eef9be1e696f69a2f95711be37915aac283bb4' + \ 'b34dcbaf7d032233e090 grayboxes.gif\n' +\ ' 6eacebda9fd55b59c0d2e48e2ed59ce9fd683379592f8' + \ 'e662b1de88e041f53c9 index.html\n' # create from string array ---------------------------------- string = serialization.split('\n') string = string[:-1] self.assertEqual(4, len(string)) tree2 = MerkleTree.create_from_string_array(string, HashTypes.SHA2) ser2 = tree2.to_string(0) self.assertEqual(serialization, ser2) # create from serialization --------------------------------- tree1 = MerkleTree.create_from_serialization(serialization, HashTypes.SHA2) ser1 = tree1.to_string(0) self.assertEqual(serialization, ser1) self.assertTrue(tree1 == tree2) # 2014-06-26 tagged this on here to test firstLineRE_1() first_line = string[0] match_ = MerkleTree.first_line_re_2().match(first_line) self.assertTrue(match_ is not None) self.assertEqual(match_.group(1), '') # indent tree_hash = match_.group(2) dir_name = match_.group(3) self.assertEqual(tree_hash + ' ' + dir_name, first_line) def test_xlattice_bug3(self): """ this test relies on dat2.xlattice.org being locally present and an internally consistent merkleization """ with open('tests/test_data/dat2.xlattice.org', 'rb') as file: serialization = str(file.read(), 'utf-8') # create from serialization --------------------------------- tree1 = MerkleTree.create_from_serialization(serialization, HashTypes.SHA2) ser1 = tree1.to_string(0) self.assertEqual(serialization, ser1) # create from string array ---------------------------------- string = serialization.split('\n') string = string[:-1] self.assertEqual(2511, len(string)) tree2 = MerkleTree.create_from_string_array(string, HashTypes.SHA2) ser2 = tree2.to_string(0) self.assertEqual(serialization, ser2) self.assertTrue(tree1 == tree2)
class TestMerkleTree(unittest.TestCase): """ Test package functionality at the Tree level. """ def setUp(self): self.rng = SimpleRNG(time.time()) def tearDown(self): pass # utility functions --------------------------------------------- def get_two_unique_directory_names(self): """ Make two different quasi-random directory names.""" dir_name1 = self.rng.next_file_name(MAX_NAME_LEN) dir_name2 = dir_name1 while dir_name2 == dir_name1: dir_name2 = self.rng.next_file_name(MAX_NAME_LEN) self.assertTrue(len(dir_name1) > 0) self.assertTrue(len(dir_name2) > 0) self.assertTrue(dir_name1 != dir_name2) return (dir_name1, dir_name2) def make_one_named_test_directory(self, name, depth, width): """ Make a directory tree with a specific name, depth and width.""" dir_path = "tmp/%s" % name if os.path.exists(dir_path): if os.path.isfile(dir_path): os.unlink(dir_path) elif os.path.isdir(dir_path): shutil.rmtree(dir_path) self.rng.next_data_dir(dir_path, depth, width, 32) return dir_path def make_two_test_directories(self, depth, width): """ Create two test directories with different names. """ dir_name1 = self.rng.next_file_name(MAX_NAME_LEN) dir_path1 = self.make_one_named_test_directory(dir_name1, depth, width) dir_name2 = dir_name1 while dir_name2 == dir_name1: dir_name2 = self.rng.next_file_name(MAX_NAME_LEN) dir_path2 = self.make_one_named_test_directory(dir_name2, depth, width) return (dir_name1, dir_path1, dir_name2, dir_path2) def verify_leaf_sha(self, node, path_to_file, hashtype): """ Verify a leaf node is hashed correctly, using a specific SHA hash type. """ self.assertTrue(os.path.exists(path_to_file)) with open(path_to_file, "rb") as file: data = file.read() self.assertFalse(data is None) if hashtype == HashTypes.SHA1: sha = XLSHA1() elif hashtype == HashTypes.SHA2: sha = XLSHA2() elif hashtype == HashTypes.SHA3: sha = XLSHA3() elif hashtype == HashTypes.BLAKE2B: sha = XLBLAKE2B_256() else: raise NotImplementedError sha.update(data) hash_ = sha.digest() self.assertEqual(hash_, node.bin_hash) def verify_tree_sha(self, node, path_to_node, hashtype): """ Verify tree elements are hashed correctly, assuming that the node is a MerkleTree, using a specific SHA hash type. """ if node.nodes is None: self.assertEqual(None, node.bin_hash) else: hash_count = 0 if hashtype == HashTypes.SHA1: sha = XLSHA1() elif hashtype == HashTypes.SHA2: sha = XLSHA2() elif hashtype == HashTypes.SHA3: sha = XLSHA3() elif hashtype == HashTypes.BLAKE2B: sha = XLBLAKE2B_256() else: raise NotImplementedError for node_ in node.nodes: path_to_file = os.path.join(path_to_node, node_.name) if isinstance(node_, MerkleLeaf): self.verify_leaf_sha(node_, path_to_file, hashtype) elif isinstance(node_, MerkleTree): self.verify_tree_sha(node_, path_to_file, hashtype) else: self.fail("unknown node type!") if node_.bin_hash is not None: hash_count += 1 sha.update(node_.bin_hash) # take care to compare values of the same type; # node.binHash is binary, node.hexHash is hex if hash_count == 0: self.assertEqual(None, node.bin_hash) else: self.assertEqual(sha.digest(), node.bin_hash) # unit tests ---------------------------------------------------- def test_pathless_unbound(self): """ Test basic characteristics of very simple MerkleTrees created using our standard SHA hash types. """ for using in [HashTypes.SHA1, HashTypes.SHA2, HashTypes.SHA3, HashTypes.BLAKE2B]: self.do_test_pathless_unbound(using) def do_test_pathless_unbound(self, hashtype): """ Test basic characteristics of very simple MerkleTrees created using a specific SHA hash type. """ (dir_name1, dir_name2) = self.get_two_unique_directory_names() check_hashtype(hashtype) tree1 = MerkleTree(dir_name1, hashtype) self.assertEqual(dir_name1, tree1.name) if hashtype == HashTypes.SHA1: self.assertEqual(SHA1_HEX_NONE, tree1.hex_hash) elif hashtype == HashTypes.SHA2: self.assertEqual(SHA2_HEX_NONE, tree1.hex_hash) elif hashtype == HashTypes.SHA3: self.assertEqual(SHA3_HEX_NONE, tree1.hex_hash) elif hashtype == HashTypes.BLAKE2B_256: self.assertEqual(BLAKE2B_256_HEX_NONE, tree1.hex_hash) else: raise NotImplementedError tree2 = MerkleTree(dir_name2, hashtype) self.assertEqual(dir_name2, tree2.name) # these tests remain skimpy self.assertFalse(tree1 is None) self.assertTrue(tree1 == tree1) self.assertFalse(tree1 == tree2) tree1_str = tree1.to_string(0) # there should be no indent on the first line self.assertFalse(tree1_str[0] == ' ') # no extra lines should be added lines = tree1_str.split('\n') # this split generates an extra blank line, because the serialization # ends with CR-LF if lines[-1] == '': lines = lines[:-1] self.assertEqual(1, len(lines)) tree1_rebuilt = MerkleTree.create_from_serialization( tree1_str, hashtype) self.assertTrue(tree1 == tree1_rebuilt) def test_bound_flat_dirs(self): """ Test handling of flat directories with a few data files using varioush SHA hash types. """ for using in [HashTypes.SHA1, HashTypes.SHA2, HashTypes.SHA3, ]: self.do_test_bound_flat_dirs(using) def do_test_bound_flat_dirs(self, hashtype): """test directory is single level, with four data files""" check_hashtype(hashtype) (dir_name1, dir_path1, dir_name2, dir_path2) =\ self.make_two_test_directories(ONE, FOUR) tree1 = MerkleTree.create_from_file_system(dir_path1, hashtype) self.assertEqual(dir_name1, tree1.name) nodes1 = tree1.nodes self.assertTrue(nodes1 is not None) self.assertEqual(FOUR, len(nodes1)) self.verify_tree_sha(tree1, dir_path1, hashtype) tree2 = MerkleTree.create_from_file_system(dir_path2, hashtype) self.assertEqual(dir_name2, tree2.name) nodes2 = tree2.nodes self.assertTrue(nodes2 is not None) self.assertEqual(FOUR, len(nodes2)) self.verify_tree_sha(tree2, dir_path2, hashtype) self.assertFalse(tree1 is None) self.assertTrue(tree1 == tree1) self.assertFalse(tree1 == tree2) tree1_str = tree1.to_string(0) tree1_rebuilt = MerkleTree.create_from_serialization( tree1_str, hashtype) self.assertTrue(tree1 == tree1_rebuilt) def test_bound_needle_dirs(self): """ Test directories four deep with various SHA hash types. """ for using in [HashTypes.SHA1, HashTypes.SHA2, HashTypes.SHA3, ]: self.do_test_bound_needle_dirs(using) def do_test_bound_needle_dirs(self, hashtype): """test directories four deep with one data file at the lowest level""" (dir_name1, dir_path1, dir_name2, dir_path2) =\ self.make_two_test_directories(FOUR, ONE) tree1 = MerkleTree.create_from_file_system(dir_path1, hashtype) self.assertEqual(dir_name1, tree1.name) nodes1 = tree1.nodes self.assertTrue(nodes1 is not None) self.assertEqual(ONE, len(nodes1)) self.verify_tree_sha(tree1, dir_path1, hashtype) tree2 = MerkleTree.create_from_file_system(dir_path2, hashtype) self.assertEqual(dir_name2, tree2.name) nodes2 = tree2.nodes self.assertTrue(nodes2 is not None) self.assertEqual(ONE, len(nodes2)) self.verify_tree_sha(tree2, dir_path2, hashtype) self.assertTrue(tree1 == tree1) self.assertFalse(tree1 == tree2) tree1_str = tree1.to_string(0) tree1_rebuilt = MerkleTree.create_from_serialization( tree1_str, hashtype) # # DEBUG # print "NEEDLEDIR TREE1:\n" + tree1Str # print "REBUILT TREE1:\n" + tree1Rebuilt.toString("") # # END self.assertTrue(tree1 == tree1_rebuilt) # tests of bugs previously found -------------------------------- def test_gray_boxes_bug1(self): """ Verify that bug #1 in handling serialization of grayboxes website has been corrected. """ serialization =\ '721a08022dd26e7be98b723f26131786fd2c0dc3 grayboxes.com/\n' +\ ' fcd3973c66230b9078a86a5642b4c359fe72d7da images/\n' +\ ' 15e47f4eb55197e1bfffae897e9d5ce4cba49623 grayboxes.gif\n' +\ ' 2477b9ea649f3f30c6ed0aebacfa32cb8250f3df index.html\n' # create from string array ---------------------------------- string = serialization.split('\n') string = string[:-1] self.assertEqual(4, len(string)) tree2 = MerkleTree.create_from_string_array(string, HashTypes.SHA1) ser2 = tree2.to_string(0) self.assertEqual(serialization, ser2) # create from serialization --------------------------------- tree1 = MerkleTree.create_from_serialization( serialization, HashTypes.SHA1) ser1 = tree1.to_string(0) self.assertEqual(serialization, ser1) self.assertTrue(tree1 == tree2) # 2014-06-26 tagged this on here to test firstLineRE_1() first_line = string[0] match_ = MerkleTree.first_line_re_1().match(first_line) self.assertTrue(match_ is not None) self.assertEqual(match_.group(1), '') # indent tree_hash = match_.group(2) dir_name = match_.group(3) self.assertEqual(tree_hash + ' ' + dir_name, first_line) def test_xlattice_bug1(self): """ this test relies on dat.xlattice.org being locally present and an internally consistent merkleization """ with open('tests/test_data/dat.xlattice.org', 'rb') as file: serialization = str(file.read(), 'utf-8') # create from serialization --------------------------------- tree1 = MerkleTree.create_from_serialization( serialization, HashTypes.SHA1) # # DEBUG # print "tree1 has %d nodes" % len(tree1.nodes) # with open('junk.tree1', 'w') as t: # t.write( tree1.toString(0) ) # # END ser1 = tree1.to_string(0) self.assertEqual(serialization, ser1) # create from string array ---------------------------------- string = serialization.split('\n') string = string[:-1] self.assertEqual(2511, len(string)) tree2 = MerkleTree.create_from_string_array(string, HashTypes.SHA1) ser2 = tree2.to_string(0) self.assertEqual(serialization, ser2) self.assertTrue(tree1 == tree2) def test_gray_boxes_bug3(self): """ Test solution to bug in handling grayboxes website. """ serialization =\ '088d0e391e1a4872329e0f7ac5d45b2025363e26c199a7' + \ '4ea39901d109afd6ba grayboxes.com/\n' +\ ' 24652ddc14687866e6b1251589aee7e1e3079a87f80cd' + \ '7775214f6d837612a90 images/\n' +\ ' 1eb774eef9be1e696f69a2f95711be37915aac283bb4' + \ 'b34dcbaf7d032233e090 grayboxes.gif\n' +\ ' 6eacebda9fd55b59c0d2e48e2ed59ce9fd683379592f8' + \ 'e662b1de88e041f53c9 index.html\n' # create from string array ---------------------------------- string = serialization.split('\n') string = string[:-1] self.assertEqual(4, len(string)) tree2 = MerkleTree.create_from_string_array(string, HashTypes.SHA2) ser2 = tree2.to_string(0) self.assertEqual(serialization, ser2) # create from serialization --------------------------------- tree1 = MerkleTree.create_from_serialization( serialization, HashTypes.SHA2) ser1 = tree1.to_string(0) self.assertEqual(serialization, ser1) self.assertTrue(tree1 == tree2) # 2014-06-26 tagged this on here to test firstLineRE_1() first_line = string[0] match_ = MerkleTree.first_line_re_2().match(first_line) self.assertTrue(match_ is not None) self.assertEqual(match_.group(1), '') # indent tree_hash = match_.group(2) dir_name = match_.group(3) self.assertEqual(tree_hash + ' ' + dir_name, first_line) def test_xlattice_bug3(self): """ this test relies on dat2.xlattice.org being locally present and an internally consistent merkleization """ with open('tests/test_data/dat2.xlattice.org', 'rb') as file: serialization = str(file.read(), 'utf-8') # create from serialization --------------------------------- tree1 = MerkleTree.create_from_serialization( serialization, HashTypes.SHA2) ser1 = tree1.to_string(0) self.assertEqual(serialization, ser1) # create from string array ---------------------------------- string = serialization.split('\n') string = string[:-1] self.assertEqual(2511, len(string)) tree2 = MerkleTree.create_from_string_array(string, HashTypes.SHA2) ser2 = tree2.to_string(0) self.assertEqual(serialization, ser2) self.assertTrue(tree1 == tree2)
class TestMerkleDoc(unittest.TestCase): """ Test MerkleTree functionality at the document level. """ def setUp(self): self.rng = SimpleRNG(time.time()) def tearDown(self): pass def get_two_unique_directory_names(self): """ Get two candidate directory names, making sure that they differ. """ dir_name1 = self.rng.next_file_name(MAX_NAME_LEN) dir_name2 = dir_name1 while dir_name2 == dir_name1: dir_name2 = self.rng.next_file_name(MAX_NAME_LEN) self.assertTrue(len(dir_name1) > 0) self.assertTrue(len(dir_name2) > 0) self.assertTrue(dir_name1 != dir_name2) return (dir_name1, dir_name2) def make_one_named_test_directory(self, name, depth, width): """ Create a test directory with the name, depth, and width specified. The directory is under tmp/ ; subdirectories have random names and contents. """ dir_path = "tmp/%s" % name if os.path.exists(dir_path): shutil.rmtree(dir_path) self.rng.next_data_dir(dir_path, depth, width, 32) return dir_path def make_two_test_directories(self, depth, width): """ Create two test directories under tmp/ with distinct names but the depth and width specified. """ dir_name1 = self.rng.next_file_name(MAX_NAME_LEN) dir_path1 = self.make_one_named_test_directory(dir_name1, depth, width) dir_name2 = dir_name1 while dir_name2 == dir_name1: dir_name2 = self.rng.next_file_name(MAX_NAME_LEN) dir_path2 = self.make_one_named_test_directory(dir_name2, depth, width) return (dir_name1, dir_path1, dir_name2, dir_path2) def verify_leaf_sha256(self, node, path_to_file): """ Verify that the content keys of the named file match the SHA hash of its contents. """ self.assertTrue(os.path.exists(path_to_file)) with open(path_to_file, "rb") as file: data = file.read() self.assertFalse(data is None) sha = hashlib.sha256() sha.update(data) hash_ = sha.digest() self.assertEqual(hash_, node.bin_hash) def verify_tree_sha256(self, node, path_to_tree): """ Verify that the names (content keys) of files below the node (a Merkletree) have correct content keys, matching the SHA hash of the files. """ if node.nodes is None: self.assertEqual(None, node.bin_hash) else: hash_count = 0 sha = hashlib.sha256() for node_ in node.nodes: path_to_node = os.path.join(path_to_tree, node_.name) if isinstance(node_, MerkleLeaf): self.verify_leaf_sha256(node_, path_to_node) elif isinstance(node_, MerkleTree): self.verify_tree_sha256(node_, path_to_node) else: print("DEBUG: unknown node type!") self.fail("unknown node type!") if node_.bin_hash is not None: hash_count += 1 sha.update(node_.bin_hash) if hash_count == 0: self.assertEqual(None, node.bin_hash) else: self.assertEqual(sha.digest(), node.bin_hash) # actual unit tests ############################################# def test_bound_flat_dirs(self): """test directory is single level, with four data files""" dir_name1, dir_path1, dir_name2, dir_path2 = \ self.make_two_test_directories(ONE, FOUR) doc1 = MerkleDoc.create_from_file_system(dir_path1) # pylint: disable=no-member tree1 = doc1.tree # XXX This succeeds BUT pylint doesn't get this right: it sees # doc1.tree as a function self.assertTrue(isinstance(tree1, MerkleTree)) # pylint: disable=no-member self.assertEqual(dir_name1, tree1.name) self.assertTrue(doc1.bound) self.assertEqual(("tmp/%s" % dir_name1), dir_path1) # pylint: disable=no-member nodes1 = tree1.nodes self.assertTrue(nodes1 is not None) self.assertEqual(FOUR, len(nodes1)) self.verify_tree_sha256(tree1, dir_path1) doc2 = MerkleDoc.create_from_file_system(dir_path2) tree2 = doc2.tree # pylint: disable=no-member self.assertEqual(dir_name2, tree2.name) self.assertTrue(doc2.bound) self.assertEqual(("tmp/%s" % dir_name2), dir_path2) # pylint: disable=no-member nodes2 = tree2.nodes self.assertTrue(nodes2 is not None) self.assertEqual(FOUR, len(nodes2)) self.verify_tree_sha256(tree2, dir_path2) # pylint: disable=no-member self.assertTrue(tree1.equal(tree1)) # pylint: disable=no-member self.assertFalse(tree1.equal(tree2)) # pylint: disable=no-member self.assertFalse(tree1.equal(None)) doc1_str = doc1.to_string() doc1_rebuilt = MerkleDoc.create_from_serialization(doc1_str) self.assertTrue(doc1.equal(doc1_rebuilt)) # MANGO def test_bound_needle_dirs(self): """test directories four deep with one data file at the lowest level""" (dir_name1, dir_path1, dir_name2, dir_path2) =\ self.make_two_test_directories(FOUR, ONE) doc1 = MerkleDoc.create_from_file_system(dir_path1) tree1 = doc1.tree # XXX This succeeds BUT pylint doesn't get this right: it sees # doc1.tree as a function self.assertTrue(isinstance(tree1, MerkleTree)) # pylint: disable=no-member self.assertEqual(dir_name1, tree1.name) self.assertTrue(doc1.bound) self.assertEqual(("tmp/%s" % dir_name1), dir_path1) # pylint: disable=no-member nodes1 = tree1.nodes self.assertTrue(nodes1 is not None) self.assertEqual(ONE, len(nodes1)) self.verify_tree_sha256(tree1, dir_path1) doc2 = MerkleDoc.create_from_file_system(dir_path2) tree2 = doc2.tree # pylint: disable=no-member self.assertEqual(dir_name2, tree2.name) self.assertTrue(doc2.bound) self.assertEqual(("tmp/%s" % dir_name2), dir_path2) # pylint: disable=no-member nodes2 = tree2.nodes self.assertTrue(nodes2 is not None) self.assertEqual(ONE, len(nodes2)) self.verify_tree_sha256(tree2, dir_path2) self.assertTrue(doc1.equal(doc1)) self.assertFalse(doc1.equal(doc2)) doc1_str = doc1.to_string() doc1_rebuilt = MerkleDoc.create_from_serialization(doc1_str) # # DEBUG # print "needle doc:\n" + doc1Str # print "rebuilt needle doc:\n" + doc1Rebuilt.toString() # # END self.assertTrue(doc1.equal(doc1_rebuilt)) # FOO
class TestRandomDir(unittest.TestCase): """ Test building quasi-random data files and directory structures. """ def setUp(self): self.rng = SimpleRNG(time.time()) def tearDown(self): pass # utility functions ############################################# # actual unit tests ############################################# def do_test_random_dir(self, hashtype): """ Test building random directories with specific SHA hash type. """ check_hashtype(hashtype) depth = 1 + self.rng.next_int16(3) # so 1 to 3 width = 1 + self.rng.next_int16(16) # so 1 to 16 blk_count = 1 + self.rng.next_int16(3) # so 1 to 3 # last block will usually be only partically populated max_len = BuildList.BLOCK_SIZE * (blk_count - 1) +\ self.rng.next_int16(BuildList.BLOCK_SIZE) min_len = 1 # we want the directory name to be unique path_to_dir = os.path.join('tmp', self.rng.next_file_name(8)) while os.path.exists(path_to_dir): path_to_dir = os.path.join('tmp', self.rng.next_file_name(8)) self.rng.next_data_dir(path_to_dir, depth, width, max_len, min_len) data = bytearray(max_len) # that many null bytes self.rng.next_bytes(data) # fill with random data if hashtype == HashTypes.SHA1: sha = hashlib.sha1() elif hashtype == HashTypes.SHA2: sha = hashlib.sha256() elif hashtype == HashTypes.SHA3: # pylint:disable=no-member sha = hashlib.sha3_256() elif hashtype == HashTypes.BLAKE2B: sha = hashlib.blake2b(digest_size=32) else: raise NotImplementedError sha.update(data) hash_ = sha.hexdigest() file_name = self.rng.next_file_name(8) path_to_file = os.path.join('tmp', file_name) while os.path.exists(path_to_file): file_name = self.rng.next_file_name(8) path_to_file = os.path.join('tmp', file_name) with open(path_to_file, 'wb') as file: file.write(data) if hashtype == HashTypes.SHA1: file_hash = file_sha1hex(path_to_file) elif hashtype == HashTypes.SHA2: file_hash = file_sha2hex(path_to_file) elif hashtype == HashTypes.SHA3: file_hash = file_sha3hex(path_to_file) elif hashtype == HashTypes.BLAKE2B: file_hash = file_blake2b_hex(path_to_file) else: raise NotImplementedError self.assertEqual(hash_, file_hash) def test_random_dir(self): """ Test building random directories with supported SHA hash types. """ for hashtype in HashTypes: self.do_test_random_dir(hashtype)
class TestMerkleDoc(unittest.TestCase): """ Test MerkleTree functionality at the document level. """ def setUp(self): self.rng = SimpleRNG(time.time()) def tearDown(self): pass def get_two_unique_directory_names(self): """ Get two candidate directory names, making sure that they differ. """ dir_name1 = self.rng.next_file_name(MAX_NAME_LEN) dir_name2 = dir_name1 while dir_name2 == dir_name1: dir_name2 = self.rng.next_file_name(MAX_NAME_LEN) self.assertTrue(len(dir_name1) > 0) self.assertTrue(len(dir_name2) > 0) self.assertTrue(dir_name1 != dir_name2) return (dir_name1, dir_name2) def make_one_named_test_directory(self, name, depth, width): """ Create a test directory with the name, depth, and width specified. The directory is under tmp/ ; subdirectories have random names and contents. """ dir_path = "tmp/%s" % name if os.path.exists(dir_path): shutil.rmtree(dir_path) self.rng.next_data_dir(dir_path, depth, width, 32) return dir_path def make_two_test_directories(self, depth, width): """ Create two test directories under tmp/ with distinct names but the depth and width specified. """ dir_name1 = self.rng.next_file_name(MAX_NAME_LEN) dir_path1 = self.make_one_named_test_directory(dir_name1, depth, width) dir_name2 = dir_name1 while dir_name2 == dir_name1: dir_name2 = self.rng.next_file_name(MAX_NAME_LEN) dir_path2 = self.make_one_named_test_directory(dir_name2, depth, width) return (dir_name1, dir_path1, dir_name2, dir_path2) def verify_leaf_sha256(self, node, path_to_file): """ Verify that the content keys of the named file match the SHA hash of its contents. """ self.assertTrue(os.path.exists(path_to_file)) with open(path_to_file, "rb") as file: data = file.read() self.assertFalse(data is None) sha = XLSHA2() sha.update(data) hash_ = sha.digest() self.assertEqual(hash_, node.bin_hash) def verify_tree_sha256(self, node, path_to_tree): """ Verify that the names (content keys) of files below the node (a Merkletree) have correct content keys, matching the SHA hash of the files. """ if node.nodes is None: self.assertEqual(None, node.bin_hash) else: hash_count = 0 sha = XLSHA2() for node_ in node.nodes: path_to_node = os.path.join(path_to_tree, node_.name) if isinstance(node_, MerkleLeaf): self.verify_leaf_sha256(node_, path_to_node) elif isinstance(node_, MerkleTree): self.verify_tree_sha256(node_, path_to_node) else: print("DEBUG: unknown node type!") self.fail("unknown node type!") if node_.bin_hash is not None: hash_count += 1 sha.update(node_.bin_hash) if hash_count == 0: self.assertEqual(None, node.bin_hash) else: self.assertEqual(sha.digest(), node.bin_hash) # actual unit tests ############################################# def test_bound_flat_dirs(self): """test directory is single level, with four data files""" dir_name1, dir_path1, dir_name2, dir_path2 = \ self.make_two_test_directories(ONE, FOUR) doc1 = MerkleDoc.create_from_file_system(dir_path1) # pylint: disable=no-member tree1 = doc1.tree self.assertTrue(isinstance(tree1, MerkleTree)) # pylint: disable=no-member self.assertEqual(dir_name1, tree1.name) self.assertTrue(doc1.bound) self.assertEqual(("tmp/%s" % dir_name1), dir_path1) # pylint: disable=no-member nodes1 = tree1.nodes self.assertTrue(nodes1 is not None) self.assertEqual(FOUR, len(nodes1)) self.verify_tree_sha256(tree1, dir_path1) doc2 = MerkleDoc.create_from_file_system(dir_path2) tree2 = doc2.tree # pylint: disable=no-member self.assertEqual(dir_name2, tree2.name) self.assertTrue(doc2.bound) self.assertEqual(("tmp/%s" % dir_name2), dir_path2) # pylint: disable=no-member nodes2 = tree2.nodes self.assertTrue(nodes2 is not None) self.assertEqual(FOUR, len(nodes2)) self.verify_tree_sha256(tree2, dir_path2) # pylint: disable=no-member self.assertTrue(tree1 == tree1) # pylint: disable=no-member self.assertFalse(tree1 == tree2) # pylint: disable=no-member self.assertFalse(tree1 is None) doc1_str = doc1.to_string() doc1_rebuilt = MerkleDoc.create_from_serialization(doc1_str) self.assertTrue(doc1 == doc1_rebuilt) def test_bound_needle_dirs(self): """test directories four deep with one data file at the lowest level""" (dir_name1, dir_path1, dir_name2, dir_path2) =\ self.make_two_test_directories(FOUR, ONE) doc1 = MerkleDoc.create_from_file_system(dir_path1) tree1 = doc1.tree self.assertTrue(isinstance(tree1, MerkleTree)) # pylint: disable=no-member self.assertEqual(dir_name1, tree1.name) self.assertTrue(doc1.bound) self.assertEqual(("tmp/%s" % dir_name1), dir_path1) # pylint: disable=no-member nodes1 = tree1.nodes self.assertTrue(nodes1 is not None) self.assertEqual(ONE, len(nodes1)) self.verify_tree_sha256(tree1, dir_path1) doc2 = MerkleDoc.create_from_file_system(dir_path2) tree2 = doc2.tree # pylint: disable=no-member self.assertEqual(dir_name2, tree2.name) self.assertTrue(doc2.bound) self.assertEqual(("tmp/%s" % dir_name2), dir_path2) # pylint: disable=no-member nodes2 = tree2.nodes self.assertTrue(nodes2 is not None) self.assertEqual(ONE, len(nodes2)) self.verify_tree_sha256(tree2, dir_path2) self.assertTrue(doc1 == doc1) self.assertFalse(doc1 == doc2) doc1_str = doc1.to_string() doc1_rebuilt = MerkleDoc.create_from_serialization(doc1_str) # # DEBUG # print "needle doc:\n" + doc1Str # print "rebuilt needle doc:\n" + doc1Rebuilt.toString() # # END self.assertTrue(doc1 == doc1_rebuilt) # FOO
class TestMerkleDoc(unittest.TestCase): """ Test Merkletree functionality at the Document level. """ def setUp(self): self.rng = SimpleRNG(time.time()) def tearDown(self): pass # utility functions ############################################# def get_two_unique_directory_names(self): """ Generate two quasi-random directory names. """ dir_name1 = self.rng.next_file_name(MAX_NAME_LEN) dir_name2 = dir_name1 while dir_name2 == dir_name1: dir_name2 = self.rng.next_file_name(MAX_NAME_LEN) self.assertTrue(len(dir_name1) > 0) self.assertTrue(len(dir_name2) > 0) self.assertTrue(dir_name1 != dir_name2) return (dir_name1, dir_name2) def make_one_named_test_directory(self, name, depth, width): """ Create a randomly named directory under tmp/, removing any existing directory of that name. """ dir_path = "tmp/%s" % name if os.path.exists(dir_path): if os.path.isfile(dir_path): os.unlink(dir_path) elif os.path.isdir(dir_path): shutil.rmtree(dir_path) self.rng.next_data_dir(dir_path, depth, width, 32) return dir_path def make_two_test_directories(self, depth, width): """ Generate two different names, using them to create subdirectories of tmp/. """ dir_name1 = self.rng.next_file_name(MAX_NAME_LEN) dir_path1 = self.make_one_named_test_directory(dir_name1, depth, width) dir_name2 = dir_name1 while dir_name2 == dir_name1: dir_name2 = self.rng.next_file_name(MAX_NAME_LEN) dir_path2 = self.make_one_named_test_directory(dir_name2, depth, width) return (dir_name1, dir_path1, dir_name2, dir_path2) def verify_leaf_hash(self, node, path_to_file, hashtype): """ Verify that a MerkleLeaf correctly describes a file, given a hash type. """ check_hashtype(hashtype) self.assertTrue(os.path.exists(path_to_file)) with open(path_to_file, "rb") as file: data = file.read() self.assertFalse(data is None) if hashtype == HashTypes.SHA1: sha = XLSHA1() elif hashtype == HashTypes.SHA2: sha = XLSHA2() elif hashtype == HashTypes.SHA3: # pylint: disable=no-member sha = XLSHA3() elif hashtype == HashTypes.BLAKE2B_256: # pylint: disable=no-member sha = XLBLAKE2B_256() else: raise NotImplementedError sha.update(data) hash_ = sha.digest() self.assertEqual(hash_, node.bin_hash) def verify_tree_hash(self, node, path_to_tree, hashtype): """ Given a MerkleTree, verify that it correctly describes the directory whose path is passed. """ # we assume that the node is a MerkleTree check_hashtype(hashtype) if node.nodes is None: self.assertEqual(None, node.bin_hash) else: hash_count = 0 if hashtype == HashTypes.SHA1: sha = XLSHA1() elif hashtype == HashTypes.SHA2: sha = XLSHA2() elif hashtype == HashTypes.SHA3: # pylint: disable=no-member sha = XLSHA3() elif hashtype == HashTypes.BLAKE2B_256: sha = XLBLAKE2B_256() else: raise NotImplementedError for node_ in node.nodes: path_to_node = os.path.join(path_to_tree, node_.name) if isinstance(node_, MerkleLeaf): self.verify_leaf_hash(node_, path_to_node, hashtype) elif isinstance(node_, MerkleTree): self.verify_tree_hash(node_, path_to_node, hashtype) else: print("DEBUG: unknown node type!") self.fail("unknown node type!") if node_.bin_hash is not None: hash_count += 1 sha.update(node_.bin_hash) if hash_count == 0: self.assertEqual(None, node.bin_hash) else: self.assertEqual(sha.digest(), node.bin_hash) # actual unit tests ############################################# def test_bound_flat_dirs(self): """test directory is single level, with four data files""" for hashtype in HashTypes: self.do_test_bound_flat_dirs(hashtype) def do_test_bound_flat_dirs(self, hashtype): """ Test two flat directories with the specified hash type. """ (dir_name1, dir_path1, dir_name2, dir_path2) =\ self.make_two_test_directories(ONE, FOUR) doc1 = MerkleDoc.create_from_file_system(dir_path1, hashtype) tree1 = doc1.tree self.assertTrue(isinstance(tree1, MerkleTree)) # pylint: disable=no-member self.assertEqual(dir_name1, tree1.name) self.assertTrue(doc1.bound) self.assertEqual(("tmp/%s" % dir_name1), dir_path1) # pylint: disable=no-member nodes1 = tree1.nodes self.assertTrue(nodes1 is not None) self.assertEqual(FOUR, len(nodes1)) self.verify_tree_hash(tree1, dir_path1, hashtype) doc2 = MerkleDoc.create_from_file_system(dir_path2, hashtype) tree2 = doc2.tree # pylint: disable=no-member self.assertEqual(dir_name2, tree2.name) self.assertTrue(doc2.bound) self.assertEqual(("tmp/%s" % dir_name2), dir_path2) # pylint: disable=no-member nodes2 = tree2.nodes self.assertTrue(nodes2 is not None) self.assertEqual(FOUR, len(nodes2)) self.verify_tree_hash(tree2, dir_path2, hashtype) self.assertEqual(tree1, tree1) self.assertFalse(tree1 == tree2) self.assertFalse(tree1 is None) doc1_str = doc1.to_string() doc1_rebuilt = MerkleDoc.create_from_serialization(doc1_str, hashtype) # DEBUG # print("flat doc:\n" + doc1Str) # print("rebuilt flat doc:\n" + doc1Rebuilt.toString()) # END self.assertTrue(doc1 == doc1_rebuilt) def test_bound_needle_dirs(self): """test directories four deep with one data file at the lowest level""" for hashtype in HashTypes: self.do_test_bound_needle_dirs(hashtype) def do_test_bound_needle_dirs(self, hashtype): """ Run tests on two deeper directories. """ check_hashtype(hashtype) (dir_name1, dir_path1, dir_name2, dir_path2) =\ self.make_two_test_directories(FOUR, ONE) doc1 = MerkleDoc.create_from_file_system(dir_path1, hashtype) tree1 = doc1.tree # pylint: disable=no-member self.assertEqual(dir_name1, tree1.name) self.assertTrue(doc1.bound) self.assertEqual(("tmp/%s" % dir_name1), dir_path1) # pylint: disable=no-member nodes1 = tree1.nodes self.assertTrue(nodes1 is not None) self.assertEqual(ONE, len(nodes1)) self.verify_tree_hash(tree1, dir_path1, hashtype) doc2 = MerkleDoc.create_from_file_system(dir_path2, hashtype) tree2 = doc2.tree # pylint: disable=no-member self.assertEqual(dir_name2, tree2.name) self.assertTrue(doc2.bound) self.assertEqual(("tmp/%s" % dir_name2), dir_path2) # pylint: disable=no-member nodes2 = tree2.nodes self.assertTrue(nodes2 is not None) self.assertEqual(ONE, len(nodes2)) self.verify_tree_hash(tree2, dir_path2, hashtype) self.assertTrue(doc1 == doc1) self.assertFalse(doc1 == doc2) doc1_str = doc1.to_string() doc1_rebuilt = MerkleDoc.create_from_serialization(doc1_str, hashtype) # # DEBUG # print "needle doc:\n" + doc1Str # print "rebuilt needle doc:\n" + doc1Rebuilt.toString() # # END self.assertTrue(doc1 == doc1_rebuilt)
class TestMerkleDoc(unittest.TestCase): def setUp(self): self.rng = SimpleRNG(time.time()) def tearDown(self): pass # utility functions ############################################# def get_two_unique_directory_names(self): dir_name1 = self.rng.next_file_name(MAX_NAME_LEN) dir_name2 = dir_name1 while dir_name2 == dir_name1: dir_name2 = self.rng.next_file_name(MAX_NAME_LEN) self.assertTrue(len(dir_name1) > 0) self.assertTrue(len(dir_name2) > 0) self.assertTrue(dir_name1 != dir_name2) return (dir_name1, dir_name2) def make_one_named_test_directory(self, name, depth, width): dir_path = "tmp/%s" % name if os.path.exists(dir_path): shutil.rmtree(dir_path) self.rng.next_data_dir(dir_path, depth, width, 32) return dir_path def make_two_test_directories(self, depth, width): dir_name1 = self.rng.next_file_name(MAX_NAME_LEN) dir_path1 = self.make_one_named_test_directory(dir_name1, depth, width) dir_name2 = dir_name1 while dir_name2 == dir_name1: dir_name2 = self.rng.next_file_name(MAX_NAME_LEN) dir_path2 = self.make_one_named_test_directory(dir_name2, depth, width) return (dir_name1, dir_path1, dir_name2, dir_path2) def verify_leaf_sha(self, node, path_to_file, hashtype): check_hashtype(hashtype) self.assertTrue(os.path.exists(path_to_file)) with open(path_to_file, "rb") as file: data = file.read() self.assertFalse(data is None) # pylint: disable=redefined-variable-type if hashtype == HashTypes.SHA1: sha = hashlib.sha1() elif hashtype == HashTypes.SHA2: sha = hashlib.sha256() elif hashtype == HashTypes.SHA3: # pylint: disable=no-member sha = hashlib.sha3_256() sha.update(data) hash_ = sha.digest() self.assertEqual(hash_, node.bin_hash) def verify_tree_sha(self, node, path_to_tree, hashtype): # we assume that the node is a MerkleTree check_hashtype(hashtype) if node.nodes is None: self.assertEqual(None, node.bin_hash) else: hash_count = 0 # pylint: disable=redefined-variable-type if hashtype == HashTypes.SHA1: sha = hashlib.sha1() elif hashtype == HashTypes.SHA2: sha = hashlib.sha256() elif hashtype == HashTypes.SHA3: # pylint: disable=no-member sha = hashlib.sha3_256() for node_ in node.nodes: path_to_node = os.path.join(path_to_tree, node_.name) if isinstance(node_, MerkleLeaf): self.verify_leaf_sha(node_, path_to_node, hashtype) elif isinstance(node_, MerkleTree): self.verify_tree_sha(node_, path_to_node, hashtype) else: print("DEBUG: unknown node type!") self.fail("unknown node type!") if node_.bin_hash is not None: hash_count += 1 sha.update(node_.bin_hash) if hash_count == 0: self.assertEqual(None, node.bin_hash) else: self.assertEqual(sha.digest(), node.bin_hash) # actual unit tests ############################################# def test_bound_flat_dirs(self): """test directory is single level, with four data files""" for using in [HashTypes.SHA1, HashTypes.SHA2, HashTypes.SHA3, ]: self.do_test_bound_flat_dirs(using) def do_test_bound_flat_dirs(self, hashtype): (dir_name1, dir_path1, dir_name2, dir_path2) =\ self.make_two_test_directories(ONE, FOUR) doc1 = MerkleDoc.create_from_file_system(dir_path1, hashtype) tree1 = doc1.tree self.assertTrue(isinstance(tree1, MerkleTree)) # pylint: disable=no-member self.assertEqual(dir_name1, tree1.name) self.assertTrue(doc1.bound) self.assertEqual(("tmp/%s" % dir_name1), dir_path1) # pylint: disable=no-member nodes1 = tree1.nodes self.assertTrue(nodes1 is not None) self.assertEqual(FOUR, len(nodes1)) self.verify_tree_sha(tree1, dir_path1, hashtype) doc2 = MerkleDoc.create_from_file_system(dir_path2, hashtype) tree2 = doc2.tree # pylint: disable=no-member self.assertEqual(dir_name2, tree2.name) self.assertTrue(doc2.bound) self.assertEqual(("tmp/%s" % dir_name2), dir_path2) # pylint: disable=no-member nodes2 = tree2.nodes self.assertTrue(nodes2 is not None) self.assertEqual(FOUR, len(nodes2)) self.verify_tree_sha(tree2, dir_path2, hashtype) self.assertEqual(tree1, tree1) self.assertFalse(tree1 == tree2) self.assertFalse(tree1 is None) doc1_str = doc1.to_string() doc1_rebuilt = MerkleDoc.create_from_serialization(doc1_str, hashtype) # DEBUG #print("flat doc:\n" + doc1Str) #print("rebuilt flat doc:\n" + doc1Rebuilt.toString()) # END self.assertTrue(doc1.equal(doc1_rebuilt)) # MANGO def test_bound_needle_dirs(self): """test directories four deep with one data file at the lowest level""" for using in [HashTypes.SHA1, HashTypes.SHA2, HashTypes.SHA3, ]: self.do_test_bound_needle_dirs(using) def do_test_bound_needle_dirs(self, hashtype): check_hashtype(hashtype) (dir_name1, dir_path1, dir_name2, dir_path2) =\ self.make_two_test_directories(FOUR, ONE) doc1 = MerkleDoc.create_from_file_system(dir_path1, hashtype) tree1 = doc1.tree # pylint: disable=no-member self.assertEqual(dir_name1, tree1.name) self.assertTrue(doc1.bound) self.assertEqual(("tmp/%s" % dir_name1), dir_path1) # pylint: disable=no-member nodes1 = tree1.nodes self.assertTrue(nodes1 is not None) self.assertEqual(ONE, len(nodes1)) self.verify_tree_sha(tree1, dir_path1, hashtype) doc2 = MerkleDoc.create_from_file_system(dir_path2, hashtype) tree2 = doc2.tree # pylint: disable=no-member self.assertEqual(dir_name2, tree2.name) self.assertTrue(doc2.bound) self.assertEqual(("tmp/%s" % dir_name2), dir_path2) # pylint: disable=no-member nodes2 = tree2.nodes self.assertTrue(nodes2 is not None) self.assertEqual(ONE, len(nodes2)) self.verify_tree_sha(tree2, dir_path2, hashtype) self.assertTrue(doc1.equal(doc1)) self.assertFalse(doc1.equal(doc2)) doc1_str = doc1.to_string() doc1_rebuilt = MerkleDoc.create_from_serialization(doc1_str, hashtype) # # DEBUG # print "needle doc:\n" + doc1Str # print "rebuilt needle doc:\n" + doc1Rebuilt.toString() # # END self.assertTrue(doc1.equal(doc1_rebuilt)) # FOO
class TestNLHTree2(unittest.TestCase): """ Test trees derived from various quasi-random directory structures. """ def setUp(self): self.rng = SimpleRNG(time.time()) def tearDown(self): pass # utility functions --------------------------------------------- def get_two_unique_directory_names(self): """ Make two unique directory names. """ dir_name1 = self.rng.next_file_name(MAX_NAME_LEN) dir_name2 = dir_name1 while dir_name2 == dir_name1: dir_name2 = self.rng.next_file_name(MAX_NAME_LEN) self.assertTrue(len(dir_name1) > 0) self.assertTrue(len(dir_name2) > 0) self.assertTrue(dir_name1 != dir_name2) return (dir_name1, dir_name2) def make_one_named_test_directory(self, name, depth, width): """ Create a test directory below tmp/ with specified characteristics. """ dir_path = "tmp/%s" % name if os.path.exists(dir_path): shutil.rmtree(dir_path) self.rng.next_data_dir(dir_path, depth, width, 32) return dir_path def make_two_test_directories(self, depth, width): """ Make two distinct quasi-random test directories below tmp/. """ dir_name1 = self.rng.next_file_name(MAX_NAME_LEN) dir_path1 = self.make_one_named_test_directory(dir_name1, depth, width) dir_name2 = dir_name1 while dir_name2 == dir_name1: dir_name2 = self.rng.next_file_name(MAX_NAME_LEN) dir_path2 = self.make_one_named_test_directory(dir_name2, depth, width) return (dir_name1, dir_path1, dir_name2, dir_path2) # unit tests ---------------------------------------------------- def test_pathless_unbound(self): """ Test the constructor using various hash types. """ for hashtype in [HashTypes.SHA1, HashTypes.SHA2, HashTypes.SHA3, ]: self.do_test_pathless_unbound(hashtype) def do_test_pathless_unbound(self, hashtype): """ Test constructor using two directories and a specific hash type. """ (dir_name1, dir_name2) = self.get_two_unique_directory_names() check_hashtype(hashtype) tree1 = NLHTree(dir_name1, hashtype) self.assertEqual(dir_name1, tree1.name) self.assertEqual(tree1.hashtype, hashtype) tree2 = NLHTree(dir_name2, hashtype) self.assertEqual(dir_name2, tree2.name) self.assertEqual(tree2.hashtype, hashtype) self.assertTrue(tree1 == tree1) self.assertFalse(tree1 == tree2) self.assertFalse(tree1 is None) tree1c = tree1.clone() self.assertEqual(tree1c, tree1) def test_bound_flat_dirs(self): """ Test directory is single level, with four data files, using various hash types. """ for hashtype in HashTypes: self.do_test_bound_flat_dirs(hashtype) def do_test_bound_flat_dirs(self, hashtype): """ Test directory is single level, with four data files, using specific hash type. """ (dir_name1, dir_path1, dir_name2, dir_path2) =\ self.make_two_test_directories(ONE, FOUR) tree1 = NLHTree.create_from_file_system(dir_path1, hashtype) self.assertEqual(dir_name1, tree1.name, True) nodes1 = tree1.nodes self.assertTrue(nodes1 is not None) self.assertEqual(FOUR, len(nodes1)) tree2 = NLHTree.create_from_file_system(dir_path2, hashtype) self.assertEqual(dir_name2, tree2.name) nodes2 = tree2.nodes self.assertTrue(nodes2 is not None) self.assertEqual(FOUR, len(nodes2)) self.assertEqual(tree1, tree1) self.assertFalse(tree1 == tree2) self.assertFalse(tree1 is None) tree1c = tree1.clone() self.assertEqual(tree1c, tree1) def test_bound_needle_dirs1(self): """ Test directories four deep with one data file at the lowest level using various hash types. """ for hashtype in HashTypes: self.do_test_bound_needle_dirs(hashtype) def do_test_bound_needle_dirs(self, hashtype): """ Test directories four deep with one data file at the lowest level using specific hash type. """ (dir_name1, dir_path1, dir_name2, dir_path2) =\ self.make_two_test_directories(FOUR, ONE) tree1 = NLHTree.create_from_file_system(dir_path1, hashtype) self.assertEqual(dir_name1, tree1.name) nodes1 = tree1.nodes self.assertTrue(nodes1 is not None) self.assertEqual(ONE, len(nodes1)) tree2 = NLHTree.create_from_file_system(dir_path2, hashtype) self.assertEqual(dir_name2, tree2.name) nodes2 = tree2.nodes self.assertTrue(nodes2 is not None) self.assertEqual(ONE, len(nodes2)) self.assertTrue(tree1 == tree1) self.assertFalse(tree1 == tree2) tree1c = tree1.clone() self.assertEqual(tree1c, tree1)