Esempio n. 1
0
class TestMerkleTree2(unittest.TestCase):
    """ Test MerkleTree behavior with deeper directories. """

    def setUp(self):
        self.rng = SimpleRNG(time.time())

    def tearDown(self):
        pass

    def do_test_deepish_trees(self, hashtype):
        """
        Build a directory of random data, then its MerkleTree, then
        round trip to a serialization and back.
        """

        tree_top = os.path.join('tmp', self.rng.next_file_name(MAX_NAME_LEN))
        while os.path.exists(tree_top):
            tree_top = os.path.join(
                'tmp', self.rng.next_file_name(MAX_NAME_LEN))

        # Generate a quasi-random data directory, 7 deep, up to 5 files/dir
        self.rng.next_data_dir(tree_top, depth=7, width=5, max_len=4096)

        # Build a MerkleTree specifying the directory.
        tree = MerkleTree.create_from_file_system(tree_top, hashtype)

        # ROUND TRIP 1 ----------------------------------------------

        # Serialize it.
        ser = tree.__str__()

        # Deserialize to make another MerkleTree.
        tree2 = MerkleTree.create_from_serialization(ser, hashtype)

        self.assertTrue(tree2.__eq__(tree))
        self.assertEqual(tree2, tree)           # identical test

        # ROUND TRIP 2 ----------------------------------------------
        strings = ser.split('\n')
        strings = strings[:-1]
        tree3 = MerkleTree.create_from_string_array(strings, hashtype)
        self.assertEqual(tree3, tree)

        # ROUND TRIP 3 ----------------------------------------------
        filename = os.path.join('tmp', self.rng.next_file_name(8))
        while os.path.exists(filename):
            filename = os.path.join('tmp', self.rng.next_file_name(8))
        with open(filename, 'w') as file:
            file.write(ser)

        tree4 = MerkleTree.create_from_file(filename, hashtype)
        self.assertEqual(tree4, tree)

    def test_deepish_trees(self):
        """ Test behavior of deeper trees using various SHA hash types. """

        for hashtype in HashTypes:
            self.do_test_deepish_trees(hashtype)
Esempio n. 2
0
class TestMerkleTree2(unittest.TestCase):
    """ Test MerkleTree behavior with deeper directories. """
    def setUp(self):
        self.rng = SimpleRNG(time.time())

    def tearDown(self):
        pass

    def do_test_deepish_trees(self, hashtype):
        """
        Build a directory of random data, then its MerkleTree, then
        round trip to a serialization and back.
        """

        tree_top = os.path.join('tmp', self.rng.next_file_name(MAX_NAME_LEN))
        while os.path.exists(tree_top):
            tree_top = os.path.join('tmp',
                                    self.rng.next_file_name(MAX_NAME_LEN))

        # Generate a quasi-random data directory, 7 deep, up to 5 files/dir
        self.rng.next_data_dir(tree_top, depth=7, width=5, max_len=4096)

        # Build a MerkleTree specifying the directory.
        tree = MerkleTree.create_from_file_system(tree_top, hashtype)

        # ROUND TRIP 1 ----------------------------------------------

        # Serialize it.
        ser = tree.__str__()

        # Deserialize to make another MerkleTree.
        tree2 = MerkleTree.create_from_serialization(ser, hashtype)

        self.assertTrue(tree2.__eq__(tree))
        self.assertEqual(tree2, tree)  # identical test

        # ROUND TRIP 2 ----------------------------------------------
        strings = ser.split('\n')
        strings = strings[:-1]
        tree3 = MerkleTree.create_from_string_array(strings, hashtype)
        self.assertEqual(tree3, tree)

        # ROUND TRIP 3 ----------------------------------------------
        filename = os.path.join('tmp', self.rng.next_file_name(8))
        while os.path.exists(filename):
            filename = os.path.join('tmp', self.rng.next_file_name(8))
        with open(filename, 'w') as file:
            file.write(ser)

        tree4 = MerkleTree.create_from_file(filename, hashtype)
        self.assertEqual(tree4, tree)

    def test_deepish_trees(self):
        """ Test behavior of deeper trees using various SHA hash types. """

        for hashtype in HashTypes:
            self.do_test_deepish_trees(hashtype)
Esempio n. 3
0
class TestMerkleTree(unittest.TestCase):
    """ Test package functionality at the Tree level. """
    def setUp(self):
        self.rng = SimpleRNG(time.time())

    def tearDown(self):
        pass

    # utility functions ---------------------------------------------

    def get_two_unique_directory_names(self):
        """ Make two different quasi-random directory names."""
        dir_name1 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_name2 = dir_name1
        while dir_name2 == dir_name1:
            dir_name2 = self.rng.next_file_name(MAX_NAME_LEN)
        self.assertTrue(len(dir_name1) > 0)
        self.assertTrue(len(dir_name2) > 0)
        self.assertTrue(dir_name1 != dir_name2)
        return (dir_name1, dir_name2)

    def make_one_named_test_directory(self, name, depth, width):
        """ Make a directory tree with a specific name, depth and width."""
        dir_path = "tmp/%s" % name
        if os.path.exists(dir_path):
            if os.path.isfile(dir_path):
                os.unlink(dir_path)
            elif os.path.isdir(dir_path):
                shutil.rmtree(dir_path)
        self.rng.next_data_dir(dir_path, depth, width, 32)
        return dir_path

    def make_two_test_directories(self, depth, width):
        """ Create two test directories with different names. """
        dir_name1 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_path1 = self.make_one_named_test_directory(dir_name1, depth, width)

        dir_name2 = dir_name1
        while dir_name2 == dir_name1:
            dir_name2 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_path2 = self.make_one_named_test_directory(dir_name2, depth, width)

        return (dir_name1, dir_path1, dir_name2, dir_path2)

    def verify_leaf_sha(self, node, path_to_file, hashtype):
        """
        Verify a leaf node is hashed correctly, using a specific SHA hash type.
        """
        self.assertTrue(os.path.exists(path_to_file))
        with open(path_to_file, "rb") as file:
            data = file.read()
        self.assertFalse(data is None)
        if hashtype == HashTypes.SHA1:
            sha = XLSHA1()
        elif hashtype == HashTypes.SHA2:
            sha = XLSHA2()
        elif hashtype == HashTypes.SHA3:
            sha = XLSHA3()
        elif hashtype == HashTypes.BLAKE2B:
            sha = XLBLAKE2B_256()
        else:
            raise NotImplementedError
        sha.update(data)
        hash_ = sha.digest()
        self.assertEqual(hash_, node.bin_hash)

    def verify_tree_sha(self, node, path_to_node, hashtype):
        """
        Verify tree elements are hashed correctly, assuming that the node
        is a MerkleTree, using a specific SHA hash type.
        """
        if node.nodes is None:
            self.assertEqual(None, node.bin_hash)
        else:
            hash_count = 0
            if hashtype == HashTypes.SHA1:
                sha = XLSHA1()
            elif hashtype == HashTypes.SHA2:
                sha = XLSHA2()
            elif hashtype == HashTypes.SHA3:
                sha = XLSHA3()
            elif hashtype == HashTypes.BLAKE2B:
                sha = XLBLAKE2B_256()
            else:
                raise NotImplementedError
            for node_ in node.nodes:
                path_to_file = os.path.join(path_to_node, node_.name)
                if isinstance(node_, MerkleLeaf):
                    self.verify_leaf_sha(node_, path_to_file, hashtype)
                elif isinstance(node_, MerkleTree):
                    self.verify_tree_sha(node_, path_to_file, hashtype)
                else:
                    self.fail("unknown node type!")
                if node_.bin_hash is not None:
                    hash_count += 1
                    sha.update(node_.bin_hash)

            # take care to compare values of the same type;
            # node.binHash is binary, node.hexHash is hex
            if hash_count == 0:
                self.assertEqual(None, node.bin_hash)
            else:
                self.assertEqual(sha.digest(), node.bin_hash)

    # unit tests ----------------------------------------------------

    def test_pathless_unbound(self):
        """
        Test basic characteristics of very simple MerkleTrees created
        using our standard SHA hash types.
        """
        for using in [
                HashTypes.SHA1, HashTypes.SHA2, HashTypes.SHA3,
                HashTypes.BLAKE2B
        ]:
            self.do_test_pathless_unbound(using)

    def do_test_pathless_unbound(self, hashtype):
        """
        Test basic characteristics of very simple MerkleTrees created
        using a specific SHA hash type.
        """
        (dir_name1, dir_name2) = self.get_two_unique_directory_names()

        check_hashtype(hashtype)
        tree1 = MerkleTree(dir_name1, hashtype)
        self.assertEqual(dir_name1, tree1.name)
        if hashtype == HashTypes.SHA1:
            self.assertEqual(SHA1_HEX_NONE, tree1.hex_hash)
        elif hashtype == HashTypes.SHA2:
            self.assertEqual(SHA2_HEX_NONE, tree1.hex_hash)
        elif hashtype == HashTypes.SHA3:
            self.assertEqual(SHA3_HEX_NONE, tree1.hex_hash)
        elif hashtype == HashTypes.BLAKE2B_256:
            self.assertEqual(BLAKE2B_256_HEX_NONE, tree1.hex_hash)
        else:
            raise NotImplementedError
        tree2 = MerkleTree(dir_name2, hashtype)
        self.assertEqual(dir_name2, tree2.name)

        # these tests remain skimpy
        self.assertFalse(tree1 is None)
        self.assertTrue(tree1 == tree1)
        self.assertFalse(tree1 == tree2)

        tree1_str = tree1.to_string(0)

        # there should be no indent on the first line
        self.assertFalse(tree1_str[0] == ' ')

        # no extra lines should be added
        lines = tree1_str.split('\n')
        # this split generates an extra blank line, because the serialization
        # ends with CR-LF
        if lines[-1] == '':
            lines = lines[:-1]
        self.assertEqual(1, len(lines))

        tree1_rebuilt = MerkleTree.create_from_serialization(
            tree1_str, hashtype)
        self.assertTrue(tree1 == tree1_rebuilt)

    def test_bound_flat_dirs(self):
        """
        Test handling of flat directories with a few data files
        using varioush SHA hash types.
        """
        for using in [
                HashTypes.SHA1,
                HashTypes.SHA2,
                HashTypes.SHA3,
        ]:
            self.do_test_bound_flat_dirs(using)

    def do_test_bound_flat_dirs(self, hashtype):
        """test directory is single level, with four data files"""

        check_hashtype(hashtype)
        (dir_name1, dir_path1, dir_name2, dir_path2) =\
            self.make_two_test_directories(ONE, FOUR)
        tree1 = MerkleTree.create_from_file_system(dir_path1, hashtype)
        self.assertEqual(dir_name1, tree1.name)
        nodes1 = tree1.nodes
        self.assertTrue(nodes1 is not None)
        self.assertEqual(FOUR, len(nodes1))
        self.verify_tree_sha(tree1, dir_path1, hashtype)

        tree2 = MerkleTree.create_from_file_system(dir_path2, hashtype)
        self.assertEqual(dir_name2, tree2.name)
        nodes2 = tree2.nodes
        self.assertTrue(nodes2 is not None)
        self.assertEqual(FOUR, len(nodes2))
        self.verify_tree_sha(tree2, dir_path2, hashtype)

        self.assertFalse(tree1 is None)
        self.assertTrue(tree1 == tree1)
        self.assertFalse(tree1 == tree2)

        tree1_str = tree1.to_string(0)
        tree1_rebuilt = MerkleTree.create_from_serialization(
            tree1_str, hashtype)
        self.assertTrue(tree1 == tree1_rebuilt)

    def test_bound_needle_dirs(self):
        """
        Test directories four deep with various SHA hash types.
        """
        for using in [
                HashTypes.SHA1,
                HashTypes.SHA2,
                HashTypes.SHA3,
        ]:
            self.do_test_bound_needle_dirs(using)

    def do_test_bound_needle_dirs(self, hashtype):
        """test directories four deep with one data file at the lowest level"""
        (dir_name1, dir_path1, dir_name2, dir_path2) =\
            self.make_two_test_directories(FOUR, ONE)
        tree1 = MerkleTree.create_from_file_system(dir_path1, hashtype)

        self.assertEqual(dir_name1, tree1.name)
        nodes1 = tree1.nodes
        self.assertTrue(nodes1 is not None)
        self.assertEqual(ONE, len(nodes1))
        self.verify_tree_sha(tree1, dir_path1, hashtype)

        tree2 = MerkleTree.create_from_file_system(dir_path2, hashtype)
        self.assertEqual(dir_name2, tree2.name)
        nodes2 = tree2.nodes
        self.assertTrue(nodes2 is not None)
        self.assertEqual(ONE, len(nodes2))
        self.verify_tree_sha(tree2, dir_path2, hashtype)

        self.assertTrue(tree1 == tree1)
        self.assertFalse(tree1 == tree2)

        tree1_str = tree1.to_string(0)
        tree1_rebuilt = MerkleTree.create_from_serialization(
            tree1_str, hashtype)
        #       # DEBUG
        #       print "NEEDLEDIR TREE1:\n" + tree1Str
        #       print "REBUILT TREE1:\n" + tree1Rebuilt.toString("")
        #       # END
        self.assertTrue(tree1 == tree1_rebuilt)

    # tests of bugs previously found --------------------------------

    def test_gray_boxes_bug1(self):
        """
        Verify that bug #1 in handling serialization of grayboxes website
        has been corrected.
        """
        serialization =\
            '721a08022dd26e7be98b723f26131786fd2c0dc3 grayboxes.com/\n' +\
            ' fcd3973c66230b9078a86a5642b4c359fe72d7da images/\n' +\
            '  15e47f4eb55197e1bfffae897e9d5ce4cba49623 grayboxes.gif\n' +\
            ' 2477b9ea649f3f30c6ed0aebacfa32cb8250f3df index.html\n'

        # create from string array ----------------------------------
        string = serialization.split('\n')
        string = string[:-1]
        self.assertEqual(4, len(string))

        tree2 = MerkleTree.create_from_string_array(string, HashTypes.SHA1)

        ser2 = tree2.to_string(0)
        self.assertEqual(serialization, ser2)

        # create from serialization ---------------------------------
        tree1 = MerkleTree.create_from_serialization(serialization,
                                                     HashTypes.SHA1)

        ser1 = tree1.to_string(0)
        self.assertEqual(serialization, ser1)

        self.assertTrue(tree1 == tree2)

        # 2014-06-26 tagged this on here to test firstLineRE_1()
        first_line = string[0]
        match_ = MerkleTree.first_line_re_1().match(first_line)
        self.assertTrue(match_ is not None)
        self.assertEqual(match_.group(1), '')  # indent
        tree_hash = match_.group(2)
        dir_name = match_.group(3)
        self.assertEqual(tree_hash + ' ' + dir_name, first_line)

    def test_xlattice_bug1(self):
        """
        this test relies on dat.xlattice.org being locally present
        and an internally consistent merkleization
        """
        with open('tests/test_data/dat.xlattice.org', 'rb') as file:
            serialization = str(file.read(), 'utf-8')

        # create from serialization ---------------------------------
        tree1 = MerkleTree.create_from_serialization(serialization,
                                                     HashTypes.SHA1)

        #       # DEBUG
        #       print "tree1 has %d nodes" % len(tree1.nodes)
        #       with open('junk.tree1', 'w') as t:
        #           t.write( tree1.toString(0) )
        #       # END

        ser1 = tree1.to_string(0)
        self.assertEqual(serialization, ser1)

        # create from string array ----------------------------------
        string = serialization.split('\n')
        string = string[:-1]
        self.assertEqual(2511, len(string))

        tree2 = MerkleTree.create_from_string_array(string, HashTypes.SHA1)

        ser2 = tree2.to_string(0)
        self.assertEqual(serialization, ser2)

        self.assertTrue(tree1 == tree2)

    def test_gray_boxes_bug3(self):
        """ Test solution to bug in handling grayboxes website. """

        serialization =\
            '088d0e391e1a4872329e0f7ac5d45b2025363e26c199a7' + \
            '4ea39901d109afd6ba grayboxes.com/\n' +\
            ' 24652ddc14687866e6b1251589aee7e1e3079a87f80cd' + \
            '7775214f6d837612a90 images/\n' +\
            '  1eb774eef9be1e696f69a2f95711be37915aac283bb4' + \
            'b34dcbaf7d032233e090 grayboxes.gif\n' +\
            ' 6eacebda9fd55b59c0d2e48e2ed59ce9fd683379592f8' + \
            'e662b1de88e041f53c9 index.html\n'

        # create from string array ----------------------------------
        string = serialization.split('\n')
        string = string[:-1]
        self.assertEqual(4, len(string))

        tree2 = MerkleTree.create_from_string_array(string, HashTypes.SHA2)

        ser2 = tree2.to_string(0)
        self.assertEqual(serialization, ser2)

        # create from serialization ---------------------------------
        tree1 = MerkleTree.create_from_serialization(serialization,
                                                     HashTypes.SHA2)

        ser1 = tree1.to_string(0)
        self.assertEqual(serialization, ser1)

        self.assertTrue(tree1 == tree2)

        # 2014-06-26 tagged this on here to test firstLineRE_1()
        first_line = string[0]
        match_ = MerkleTree.first_line_re_2().match(first_line)
        self.assertTrue(match_ is not None)
        self.assertEqual(match_.group(1), '')  # indent
        tree_hash = match_.group(2)
        dir_name = match_.group(3)
        self.assertEqual(tree_hash + ' ' + dir_name, first_line)

    def test_xlattice_bug3(self):
        """
        this test relies on dat2.xlattice.org being locally present
        and an internally consistent merkleization
        """
        with open('tests/test_data/dat2.xlattice.org', 'rb') as file:
            serialization = str(file.read(), 'utf-8')

        # create from serialization ---------------------------------
        tree1 = MerkleTree.create_from_serialization(serialization,
                                                     HashTypes.SHA2)

        ser1 = tree1.to_string(0)
        self.assertEqual(serialization, ser1)

        # create from string array ----------------------------------
        string = serialization.split('\n')
        string = string[:-1]
        self.assertEqual(2511, len(string))

        tree2 = MerkleTree.create_from_string_array(string, HashTypes.SHA2)

        ser2 = tree2.to_string(0)
        self.assertEqual(serialization, ser2)

        self.assertTrue(tree1 == tree2)
Esempio n. 4
0
class TestMerkleTree(unittest.TestCase):
    """ Test package functionality at the Tree level. """

    def setUp(self):
        self.rng = SimpleRNG(time.time())

    def tearDown(self):
        pass

    # utility functions ---------------------------------------------

    def get_two_unique_directory_names(self):
        """ Make two different quasi-random directory names."""
        dir_name1 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_name2 = dir_name1
        while dir_name2 == dir_name1:
            dir_name2 = self.rng.next_file_name(MAX_NAME_LEN)
        self.assertTrue(len(dir_name1) > 0)
        self.assertTrue(len(dir_name2) > 0)
        self.assertTrue(dir_name1 != dir_name2)
        return (dir_name1, dir_name2)

    def make_one_named_test_directory(self, name, depth, width):
        """ Make a directory tree with a specific name, depth and width."""
        dir_path = "tmp/%s" % name
        if os.path.exists(dir_path):
            if os.path.isfile(dir_path):
                os.unlink(dir_path)
            elif os.path.isdir(dir_path):
                shutil.rmtree(dir_path)
        self.rng.next_data_dir(dir_path, depth, width, 32)
        return dir_path

    def make_two_test_directories(self, depth, width):
        """ Create two test directories with different names. """
        dir_name1 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_path1 = self.make_one_named_test_directory(dir_name1, depth, width)

        dir_name2 = dir_name1
        while dir_name2 == dir_name1:
            dir_name2 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_path2 = self.make_one_named_test_directory(dir_name2, depth, width)

        return (dir_name1, dir_path1, dir_name2, dir_path2)

    def verify_leaf_sha(self, node, path_to_file, hashtype):
        """
        Verify a leaf node is hashed correctly, using a specific SHA hash type.
        """
        self.assertTrue(os.path.exists(path_to_file))
        with open(path_to_file, "rb") as file:
            data = file.read()
        self.assertFalse(data is None)
        if hashtype == HashTypes.SHA1:
            sha = XLSHA1()
        elif hashtype == HashTypes.SHA2:
            sha = XLSHA2()
        elif hashtype == HashTypes.SHA3:
            sha = XLSHA3()
        elif hashtype == HashTypes.BLAKE2B:
            sha = XLBLAKE2B_256()
        else:
            raise NotImplementedError
        sha.update(data)
        hash_ = sha.digest()
        self.assertEqual(hash_, node.bin_hash)

    def verify_tree_sha(self, node, path_to_node, hashtype):
        """
        Verify tree elements are hashed correctly, assuming that the node
        is a MerkleTree, using a specific SHA hash type.
        """
        if node.nodes is None:
            self.assertEqual(None, node.bin_hash)
        else:
            hash_count = 0
            if hashtype == HashTypes.SHA1:
                sha = XLSHA1()
            elif hashtype == HashTypes.SHA2:
                sha = XLSHA2()
            elif hashtype == HashTypes.SHA3:
                sha = XLSHA3()
            elif hashtype == HashTypes.BLAKE2B:
                sha = XLBLAKE2B_256()
            else:
                raise NotImplementedError
            for node_ in node.nodes:
                path_to_file = os.path.join(path_to_node, node_.name)
                if isinstance(node_, MerkleLeaf):
                    self.verify_leaf_sha(node_, path_to_file, hashtype)
                elif isinstance(node_, MerkleTree):
                    self.verify_tree_sha(node_, path_to_file, hashtype)
                else:
                    self.fail("unknown node type!")
                if node_.bin_hash is not None:
                    hash_count += 1
                    sha.update(node_.bin_hash)

            # take care to compare values of the same type;
            # node.binHash is binary, node.hexHash is hex
            if hash_count == 0:
                self.assertEqual(None, node.bin_hash)
            else:
                self.assertEqual(sha.digest(), node.bin_hash)

    # unit tests ----------------------------------------------------

    def test_pathless_unbound(self):
        """
        Test basic characteristics of very simple MerkleTrees created
        using our standard SHA hash types.
        """
        for using in [HashTypes.SHA1, HashTypes.SHA2,
                      HashTypes.SHA3, HashTypes.BLAKE2B]:
            self.do_test_pathless_unbound(using)

    def do_test_pathless_unbound(self, hashtype):
        """
        Test basic characteristics of very simple MerkleTrees created
        using a specific SHA hash type.
        """
        (dir_name1, dir_name2) = self.get_two_unique_directory_names()

        check_hashtype(hashtype)
        tree1 = MerkleTree(dir_name1, hashtype)
        self.assertEqual(dir_name1, tree1.name)
        if hashtype == HashTypes.SHA1:
            self.assertEqual(SHA1_HEX_NONE, tree1.hex_hash)
        elif hashtype == HashTypes.SHA2:
            self.assertEqual(SHA2_HEX_NONE, tree1.hex_hash)
        elif hashtype == HashTypes.SHA3:
            self.assertEqual(SHA3_HEX_NONE, tree1.hex_hash)
        elif hashtype == HashTypes.BLAKE2B_256:
            self.assertEqual(BLAKE2B_256_HEX_NONE, tree1.hex_hash)
        else:
            raise NotImplementedError
        tree2 = MerkleTree(dir_name2, hashtype)
        self.assertEqual(dir_name2, tree2.name)

        # these tests remain skimpy
        self.assertFalse(tree1 is None)
        self.assertTrue(tree1 == tree1)
        self.assertFalse(tree1 == tree2)

        tree1_str = tree1.to_string(0)

        # there should be no indent on the first line
        self.assertFalse(tree1_str[0] == ' ')

        # no extra lines should be added
        lines = tree1_str.split('\n')
        # this split generates an extra blank line, because the serialization
        # ends with CR-LF
        if lines[-1] == '':
            lines = lines[:-1]
        self.assertEqual(1, len(lines))

        tree1_rebuilt = MerkleTree.create_from_serialization(
            tree1_str, hashtype)
        self.assertTrue(tree1 == tree1_rebuilt)

    def test_bound_flat_dirs(self):
        """
        Test handling of flat directories with a few data files
        using varioush SHA hash types.
        """
        for using in [HashTypes.SHA1, HashTypes.SHA2, HashTypes.SHA3, ]:
            self.do_test_bound_flat_dirs(using)

    def do_test_bound_flat_dirs(self, hashtype):
        """test directory is single level, with four data files"""

        check_hashtype(hashtype)
        (dir_name1, dir_path1, dir_name2, dir_path2) =\
            self.make_two_test_directories(ONE, FOUR)
        tree1 = MerkleTree.create_from_file_system(dir_path1, hashtype)
        self.assertEqual(dir_name1, tree1.name)
        nodes1 = tree1.nodes
        self.assertTrue(nodes1 is not None)
        self.assertEqual(FOUR, len(nodes1))
        self.verify_tree_sha(tree1, dir_path1, hashtype)

        tree2 = MerkleTree.create_from_file_system(dir_path2, hashtype)
        self.assertEqual(dir_name2, tree2.name)
        nodes2 = tree2.nodes
        self.assertTrue(nodes2 is not None)
        self.assertEqual(FOUR, len(nodes2))
        self.verify_tree_sha(tree2, dir_path2, hashtype)

        self.assertFalse(tree1 is None)
        self.assertTrue(tree1 == tree1)
        self.assertFalse(tree1 == tree2)

        tree1_str = tree1.to_string(0)
        tree1_rebuilt = MerkleTree.create_from_serialization(
            tree1_str, hashtype)
        self.assertTrue(tree1 == tree1_rebuilt)

    def test_bound_needle_dirs(self):
        """
        Test directories four deep with various SHA hash types.
        """
        for using in [HashTypes.SHA1, HashTypes.SHA2, HashTypes.SHA3, ]:
            self.do_test_bound_needle_dirs(using)

    def do_test_bound_needle_dirs(self, hashtype):
        """test directories four deep with one data file at the lowest level"""
        (dir_name1, dir_path1, dir_name2, dir_path2) =\
            self.make_two_test_directories(FOUR, ONE)
        tree1 = MerkleTree.create_from_file_system(dir_path1, hashtype)

        self.assertEqual(dir_name1, tree1.name)
        nodes1 = tree1.nodes
        self.assertTrue(nodes1 is not None)
        self.assertEqual(ONE, len(nodes1))
        self.verify_tree_sha(tree1, dir_path1, hashtype)

        tree2 = MerkleTree.create_from_file_system(dir_path2, hashtype)
        self.assertEqual(dir_name2, tree2.name)
        nodes2 = tree2.nodes
        self.assertTrue(nodes2 is not None)
        self.assertEqual(ONE, len(nodes2))
        self.verify_tree_sha(tree2, dir_path2, hashtype)

        self.assertTrue(tree1 == tree1)
        self.assertFalse(tree1 == tree2)

        tree1_str = tree1.to_string(0)
        tree1_rebuilt = MerkleTree.create_from_serialization(
            tree1_str, hashtype)
#       # DEBUG
#       print "NEEDLEDIR TREE1:\n" + tree1Str
#       print "REBUILT TREE1:\n" + tree1Rebuilt.toString("")
#       # END
        self.assertTrue(tree1 == tree1_rebuilt)

    # tests of bugs previously found --------------------------------

    def test_gray_boxes_bug1(self):
        """
        Verify that bug #1 in handling serialization of grayboxes website
        has been corrected.
        """
        serialization =\
            '721a08022dd26e7be98b723f26131786fd2c0dc3 grayboxes.com/\n' +\
            ' fcd3973c66230b9078a86a5642b4c359fe72d7da images/\n' +\
            '  15e47f4eb55197e1bfffae897e9d5ce4cba49623 grayboxes.gif\n' +\
            ' 2477b9ea649f3f30c6ed0aebacfa32cb8250f3df index.html\n'

        # create from string array ----------------------------------
        string = serialization.split('\n')
        string = string[:-1]
        self.assertEqual(4, len(string))

        tree2 = MerkleTree.create_from_string_array(string, HashTypes.SHA1)

        ser2 = tree2.to_string(0)
        self.assertEqual(serialization, ser2)

        # create from serialization ---------------------------------
        tree1 = MerkleTree.create_from_serialization(
            serialization, HashTypes.SHA1)

        ser1 = tree1.to_string(0)
        self.assertEqual(serialization, ser1)

        self.assertTrue(tree1 == tree2)

        # 2014-06-26 tagged this on here to test firstLineRE_1()
        first_line = string[0]
        match_ = MerkleTree.first_line_re_1().match(first_line)
        self.assertTrue(match_ is not None)
        self.assertEqual(match_.group(1), '')               # indent
        tree_hash = match_.group(2)
        dir_name = match_.group(3)
        self.assertEqual(tree_hash + ' ' + dir_name, first_line)

    def test_xlattice_bug1(self):
        """
        this test relies on dat.xlattice.org being locally present
        and an internally consistent merkleization
        """
        with open('tests/test_data/dat.xlattice.org', 'rb') as file:
            serialization = str(file.read(), 'utf-8')

        # create from serialization ---------------------------------
        tree1 = MerkleTree.create_from_serialization(
            serialization, HashTypes.SHA1)

#       # DEBUG
#       print "tree1 has %d nodes" % len(tree1.nodes)
#       with open('junk.tree1', 'w') as t:
#           t.write( tree1.toString(0) )
#       # END

        ser1 = tree1.to_string(0)
        self.assertEqual(serialization, ser1)

        # create from string array ----------------------------------
        string = serialization.split('\n')
        string = string[:-1]
        self.assertEqual(2511, len(string))

        tree2 = MerkleTree.create_from_string_array(string, HashTypes.SHA1)

        ser2 = tree2.to_string(0)
        self.assertEqual(serialization, ser2)

        self.assertTrue(tree1 == tree2)

    def test_gray_boxes_bug3(self):
        """ Test solution to bug in handling grayboxes website. """

        serialization =\
            '088d0e391e1a4872329e0f7ac5d45b2025363e26c199a7' + \
            '4ea39901d109afd6ba grayboxes.com/\n' +\
            ' 24652ddc14687866e6b1251589aee7e1e3079a87f80cd' + \
            '7775214f6d837612a90 images/\n' +\
            '  1eb774eef9be1e696f69a2f95711be37915aac283bb4' + \
            'b34dcbaf7d032233e090 grayboxes.gif\n' +\
            ' 6eacebda9fd55b59c0d2e48e2ed59ce9fd683379592f8' + \
            'e662b1de88e041f53c9 index.html\n'

        # create from string array ----------------------------------
        string = serialization.split('\n')
        string = string[:-1]
        self.assertEqual(4, len(string))

        tree2 = MerkleTree.create_from_string_array(string, HashTypes.SHA2)

        ser2 = tree2.to_string(0)
        self.assertEqual(serialization, ser2)

        # create from serialization ---------------------------------
        tree1 = MerkleTree.create_from_serialization(
            serialization, HashTypes.SHA2)

        ser1 = tree1.to_string(0)
        self.assertEqual(serialization, ser1)

        self.assertTrue(tree1 == tree2)

        # 2014-06-26 tagged this on here to test firstLineRE_1()
        first_line = string[0]
        match_ = MerkleTree.first_line_re_2().match(first_line)
        self.assertTrue(match_ is not None)
        self.assertEqual(match_.group(1), '')               # indent
        tree_hash = match_.group(2)
        dir_name = match_.group(3)
        self.assertEqual(tree_hash + ' ' + dir_name, first_line)

    def test_xlattice_bug3(self):
        """
        this test relies on dat2.xlattice.org being locally present
        and an internally consistent merkleization
        """
        with open('tests/test_data/dat2.xlattice.org', 'rb') as file:
            serialization = str(file.read(), 'utf-8')

        # create from serialization ---------------------------------
        tree1 = MerkleTree.create_from_serialization(
            serialization, HashTypes.SHA2)

        ser1 = tree1.to_string(0)
        self.assertEqual(serialization, ser1)

        # create from string array ----------------------------------
        string = serialization.split('\n')
        string = string[:-1]
        self.assertEqual(2511, len(string))

        tree2 = MerkleTree.create_from_string_array(string, HashTypes.SHA2)

        ser2 = tree2.to_string(0)
        self.assertEqual(serialization, ser2)

        self.assertTrue(tree1 == tree2)
Esempio n. 5
0
class TestMerkleDoc(unittest.TestCase):
    """ Test MerkleTree functionality at the document level. """

    def setUp(self):
        self.rng = SimpleRNG(time.time())

    def tearDown(self):
        pass

    def get_two_unique_directory_names(self):
        """
        Get two candidate directory names, making sure that they differ.
        """
        dir_name1 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_name2 = dir_name1
        while dir_name2 == dir_name1:
            dir_name2 = self.rng.next_file_name(MAX_NAME_LEN)
        self.assertTrue(len(dir_name1) > 0)
        self.assertTrue(len(dir_name2) > 0)
        self.assertTrue(dir_name1 != dir_name2)
        return (dir_name1, dir_name2)

    def make_one_named_test_directory(self, name, depth, width):
        """
        Create a test directory with the name, depth, and width specified.
        The directory is under tmp/ ; subdirectories have random names
        and contents.
        """
        dir_path = "tmp/%s" % name
        if os.path.exists(dir_path):
            shutil.rmtree(dir_path)
        self.rng.next_data_dir(dir_path, depth, width, 32)
        return dir_path

    def make_two_test_directories(self, depth, width):
        """
        Create two test directories under tmp/ with distinct names but the
        depth and width specified.
        """
        dir_name1 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_path1 = self.make_one_named_test_directory(dir_name1, depth, width)

        dir_name2 = dir_name1
        while dir_name2 == dir_name1:
            dir_name2 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_path2 = self.make_one_named_test_directory(dir_name2, depth, width)

        return (dir_name1, dir_path1, dir_name2, dir_path2)

    def verify_leaf_sha256(self, node, path_to_file):
        """
        Verify that the content keys of the named file match the SHA
        hash of its contents.
        """
        self.assertTrue(os.path.exists(path_to_file))
        with open(path_to_file, "rb") as file:
            data = file.read()
        self.assertFalse(data is None)
        sha = hashlib.sha256()
        sha.update(data)
        hash_ = sha.digest()
        self.assertEqual(hash_, node.bin_hash)

    def verify_tree_sha256(self, node, path_to_tree):
        """
        Verify that the names (content keys) of files below the node
        (a Merkletree) have correct content keys, matching the SHA
        hash of the files.
        """
        if node.nodes is None:
            self.assertEqual(None, node.bin_hash)
        else:
            hash_count = 0
            sha = hashlib.sha256()
            for node_ in node.nodes:
                path_to_node = os.path.join(path_to_tree, node_.name)
                if isinstance(node_, MerkleLeaf):
                    self.verify_leaf_sha256(node_, path_to_node)
                elif isinstance(node_, MerkleTree):
                    self.verify_tree_sha256(node_, path_to_node)
                else:
                    print("DEBUG: unknown node type!")
                    self.fail("unknown node type!")
                if node_.bin_hash is not None:
                    hash_count += 1
                    sha.update(node_.bin_hash)

            if hash_count == 0:
                self.assertEqual(None, node.bin_hash)
            else:
                self.assertEqual(sha.digest(), node.bin_hash)

    # actual unit tests #############################################

    def test_bound_flat_dirs(self):
        """test directory is single level, with four data files"""

        dir_name1, dir_path1, dir_name2, dir_path2 = \
            self.make_two_test_directories(ONE, FOUR)
        doc1 = MerkleDoc.create_from_file_system(dir_path1)
        # pylint: disable=no-member
        tree1 = doc1.tree
        # XXX This succeeds BUT pylint doesn't get this right: it sees
        # doc1.tree as a function
        self.assertTrue(isinstance(tree1, MerkleTree))

        # pylint: disable=no-member
        self.assertEqual(dir_name1, tree1.name)
        self.assertTrue(doc1.bound)
        self.assertEqual(("tmp/%s" % dir_name1), dir_path1)
        # pylint: disable=no-member
        nodes1 = tree1.nodes
        self.assertTrue(nodes1 is not None)
        self.assertEqual(FOUR, len(nodes1))
        self.verify_tree_sha256(tree1, dir_path1)

        doc2 = MerkleDoc.create_from_file_system(dir_path2)
        tree2 = doc2.tree
        # pylint: disable=no-member
        self.assertEqual(dir_name2, tree2.name)
        self.assertTrue(doc2.bound)
        self.assertEqual(("tmp/%s" % dir_name2), dir_path2)
        # pylint: disable=no-member
        nodes2 = tree2.nodes
        self.assertTrue(nodes2 is not None)
        self.assertEqual(FOUR, len(nodes2))
        self.verify_tree_sha256(tree2, dir_path2)

        # pylint: disable=no-member
        self.assertTrue(tree1.equal(tree1))
        # pylint: disable=no-member
        self.assertFalse(tree1.equal(tree2))
        # pylint: disable=no-member
        self.assertFalse(tree1.equal(None))

        doc1_str = doc1.to_string()
        doc1_rebuilt = MerkleDoc.create_from_serialization(doc1_str)
        self.assertTrue(doc1.equal(doc1_rebuilt))  # MANGO

    def test_bound_needle_dirs(self):
        """test directories four deep with one data file at the lowest level"""
        (dir_name1, dir_path1, dir_name2, dir_path2) =\
            self.make_two_test_directories(FOUR, ONE)
        doc1 = MerkleDoc.create_from_file_system(dir_path1)
        tree1 = doc1.tree
        # XXX This succeeds BUT pylint doesn't get this right: it sees
        # doc1.tree as a function
        self.assertTrue(isinstance(tree1, MerkleTree))

        # pylint: disable=no-member
        self.assertEqual(dir_name1, tree1.name)
        self.assertTrue(doc1.bound)
        self.assertEqual(("tmp/%s" % dir_name1), dir_path1)
        # pylint: disable=no-member
        nodes1 = tree1.nodes
        self.assertTrue(nodes1 is not None)
        self.assertEqual(ONE, len(nodes1))
        self.verify_tree_sha256(tree1, dir_path1)

        doc2 = MerkleDoc.create_from_file_system(dir_path2)
        tree2 = doc2.tree
        # pylint: disable=no-member
        self.assertEqual(dir_name2, tree2.name)
        self.assertTrue(doc2.bound)
        self.assertEqual(("tmp/%s" % dir_name2), dir_path2)
        # pylint: disable=no-member
        nodes2 = tree2.nodes
        self.assertTrue(nodes2 is not None)
        self.assertEqual(ONE, len(nodes2))
        self.verify_tree_sha256(tree2, dir_path2)

        self.assertTrue(doc1.equal(doc1))
        self.assertFalse(doc1.equal(doc2))

        doc1_str = doc1.to_string()
        doc1_rebuilt = MerkleDoc.create_from_serialization(doc1_str)
#       # DEBUG
#       print "needle doc:\n" + doc1Str
#       print "rebuilt needle doc:\n" + doc1Rebuilt.toString()
#       # END
        self.assertTrue(doc1.equal(doc1_rebuilt))       # FOO
Esempio n. 6
0
class TestRandomDir(unittest.TestCase):
    """ Test building quasi-random data files and directory structures. """

    def setUp(self):
        self.rng = SimpleRNG(time.time())

    def tearDown(self):
        pass

    # utility functions #############################################

    # actual unit tests #############################################

    def do_test_random_dir(self, hashtype):
        """ Test building random directories with specific SHA hash type. """
        check_hashtype(hashtype)
        depth = 1 + self.rng.next_int16(3)       # so 1 to 3
        width = 1 + self.rng.next_int16(16)      # so 1 to 16

        blk_count = 1 + self.rng.next_int16(3)     # so 1 to 3
        # last block will usually be only partically populated
        max_len = BuildList.BLOCK_SIZE * (blk_count - 1) +\
            self.rng.next_int16(BuildList.BLOCK_SIZE)
        min_len = 1

        # we want the directory name to be unique
        path_to_dir = os.path.join('tmp', self.rng.next_file_name(8))
        while os.path.exists(path_to_dir):
            path_to_dir = os.path.join('tmp', self.rng.next_file_name(8))

        self.rng.next_data_dir(path_to_dir, depth, width, max_len, min_len)

        data = bytearray(max_len)            # that many null bytes
        self.rng.next_bytes(data)            # fill with random data
        if hashtype == HashTypes.SHA1:
            sha = hashlib.sha1()
        elif hashtype == HashTypes.SHA2:
            sha = hashlib.sha256()
        elif hashtype == HashTypes.SHA3:
            # pylint:disable=no-member
            sha = hashlib.sha3_256()
        elif hashtype == HashTypes.BLAKE2B:
            sha = hashlib.blake2b(digest_size=32)
        else:
            raise NotImplementedError
        sha.update(data)
        hash_ = sha.hexdigest()
        file_name = self.rng.next_file_name(8)
        path_to_file = os.path.join('tmp', file_name)
        while os.path.exists(path_to_file):
            file_name = self.rng.next_file_name(8)
            path_to_file = os.path.join('tmp', file_name)

        with open(path_to_file, 'wb') as file:
            file.write(data)

        if hashtype == HashTypes.SHA1:
            file_hash = file_sha1hex(path_to_file)
        elif hashtype == HashTypes.SHA2:
            file_hash = file_sha2hex(path_to_file)
        elif hashtype == HashTypes.SHA3:
            file_hash = file_sha3hex(path_to_file)
        elif hashtype == HashTypes.BLAKE2B:
            file_hash = file_blake2b_hex(path_to_file)
        else:
            raise NotImplementedError
        self.assertEqual(hash_, file_hash)

    def test_random_dir(self):
        """ Test building random directories with supported SHA hash types. """
        for hashtype in HashTypes:
            self.do_test_random_dir(hashtype)
Esempio n. 7
0
class TestMerkleDoc(unittest.TestCase):
    """ Test MerkleTree functionality at the document level. """

    def setUp(self):
        self.rng = SimpleRNG(time.time())

    def tearDown(self):
        pass

    def get_two_unique_directory_names(self):
        """
        Get two candidate directory names, making sure that they differ.
        """
        dir_name1 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_name2 = dir_name1
        while dir_name2 == dir_name1:
            dir_name2 = self.rng.next_file_name(MAX_NAME_LEN)
        self.assertTrue(len(dir_name1) > 0)
        self.assertTrue(len(dir_name2) > 0)
        self.assertTrue(dir_name1 != dir_name2)
        return (dir_name1, dir_name2)

    def make_one_named_test_directory(self, name, depth, width):
        """
        Create a test directory with the name, depth, and width specified.
        The directory is under tmp/ ; subdirectories have random names
        and contents.
        """
        dir_path = "tmp/%s" % name
        if os.path.exists(dir_path):
            shutil.rmtree(dir_path)
        self.rng.next_data_dir(dir_path, depth, width, 32)
        return dir_path

    def make_two_test_directories(self, depth, width):
        """
        Create two test directories under tmp/ with distinct names but the
        depth and width specified.
        """
        dir_name1 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_path1 = self.make_one_named_test_directory(dir_name1, depth, width)

        dir_name2 = dir_name1
        while dir_name2 == dir_name1:
            dir_name2 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_path2 = self.make_one_named_test_directory(dir_name2, depth, width)

        return (dir_name1, dir_path1, dir_name2, dir_path2)

    def verify_leaf_sha256(self, node, path_to_file):
        """
        Verify that the content keys of the named file match the SHA
        hash of its contents.
        """
        self.assertTrue(os.path.exists(path_to_file))
        with open(path_to_file, "rb") as file:
            data = file.read()
        self.assertFalse(data is None)
        sha = XLSHA2()
        sha.update(data)
        hash_ = sha.digest()
        self.assertEqual(hash_, node.bin_hash)

    def verify_tree_sha256(self, node, path_to_tree):
        """
        Verify that the names (content keys) of files below the node
        (a Merkletree) have correct content keys, matching the SHA
        hash of the files.
        """
        if node.nodes is None:
            self.assertEqual(None, node.bin_hash)
        else:
            hash_count = 0
            sha = XLSHA2()
            for node_ in node.nodes:
                path_to_node = os.path.join(path_to_tree, node_.name)
                if isinstance(node_, MerkleLeaf):
                    self.verify_leaf_sha256(node_, path_to_node)
                elif isinstance(node_, MerkleTree):
                    self.verify_tree_sha256(node_, path_to_node)
                else:
                    print("DEBUG: unknown node type!")
                    self.fail("unknown node type!")
                if node_.bin_hash is not None:
                    hash_count += 1
                    sha.update(node_.bin_hash)

            if hash_count == 0:
                self.assertEqual(None, node.bin_hash)
            else:
                self.assertEqual(sha.digest(), node.bin_hash)

    # actual unit tests #############################################

    def test_bound_flat_dirs(self):
        """test directory is single level, with four data files"""

        dir_name1, dir_path1, dir_name2, dir_path2 = \
            self.make_two_test_directories(ONE, FOUR)
        doc1 = MerkleDoc.create_from_file_system(dir_path1)
        # pylint: disable=no-member
        tree1 = doc1.tree
        self.assertTrue(isinstance(tree1, MerkleTree))

        # pylint: disable=no-member
        self.assertEqual(dir_name1, tree1.name)
        self.assertTrue(doc1.bound)
        self.assertEqual(("tmp/%s" % dir_name1), dir_path1)
        # pylint: disable=no-member
        nodes1 = tree1.nodes
        self.assertTrue(nodes1 is not None)
        self.assertEqual(FOUR, len(nodes1))
        self.verify_tree_sha256(tree1, dir_path1)

        doc2 = MerkleDoc.create_from_file_system(dir_path2)
        tree2 = doc2.tree
        # pylint: disable=no-member
        self.assertEqual(dir_name2, tree2.name)
        self.assertTrue(doc2.bound)
        self.assertEqual(("tmp/%s" % dir_name2), dir_path2)
        # pylint: disable=no-member
        nodes2 = tree2.nodes
        self.assertTrue(nodes2 is not None)
        self.assertEqual(FOUR, len(nodes2))
        self.verify_tree_sha256(tree2, dir_path2)

        # pylint: disable=no-member
        self.assertTrue(tree1 == tree1)
        # pylint: disable=no-member
        self.assertFalse(tree1 == tree2)
        # pylint: disable=no-member
        self.assertFalse(tree1 is None)

        doc1_str = doc1.to_string()
        doc1_rebuilt = MerkleDoc.create_from_serialization(doc1_str)
        self.assertTrue(doc1 == doc1_rebuilt)

    def test_bound_needle_dirs(self):
        """test directories four deep with one data file at the lowest level"""
        (dir_name1, dir_path1, dir_name2, dir_path2) =\
            self.make_two_test_directories(FOUR, ONE)
        doc1 = MerkleDoc.create_from_file_system(dir_path1)
        tree1 = doc1.tree
        self.assertTrue(isinstance(tree1, MerkleTree))

        # pylint: disable=no-member
        self.assertEqual(dir_name1, tree1.name)
        self.assertTrue(doc1.bound)
        self.assertEqual(("tmp/%s" % dir_name1), dir_path1)
        # pylint: disable=no-member
        nodes1 = tree1.nodes
        self.assertTrue(nodes1 is not None)
        self.assertEqual(ONE, len(nodes1))
        self.verify_tree_sha256(tree1, dir_path1)

        doc2 = MerkleDoc.create_from_file_system(dir_path2)
        tree2 = doc2.tree
        # pylint: disable=no-member
        self.assertEqual(dir_name2, tree2.name)
        self.assertTrue(doc2.bound)
        self.assertEqual(("tmp/%s" % dir_name2), dir_path2)
        # pylint: disable=no-member
        nodes2 = tree2.nodes
        self.assertTrue(nodes2 is not None)
        self.assertEqual(ONE, len(nodes2))
        self.verify_tree_sha256(tree2, dir_path2)

        self.assertTrue(doc1 == doc1)
        self.assertFalse(doc1 == doc2)

        doc1_str = doc1.to_string()
        doc1_rebuilt = MerkleDoc.create_from_serialization(doc1_str)
#       # DEBUG
#       print "needle doc:\n" + doc1Str
#       print "rebuilt needle doc:\n" + doc1Rebuilt.toString()
#       # END
        self.assertTrue(doc1 == doc1_rebuilt)       # FOO
Esempio n. 8
0
class TestMerkleDoc(unittest.TestCase):
    """ Test Merkletree functionality at the Document level. """

    def setUp(self):
        self.rng = SimpleRNG(time.time())

    def tearDown(self):
        pass

    # utility functions #############################################
    def get_two_unique_directory_names(self):
        """ Generate two quasi-random directory names. """

        dir_name1 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_name2 = dir_name1
        while dir_name2 == dir_name1:
            dir_name2 = self.rng.next_file_name(MAX_NAME_LEN)
        self.assertTrue(len(dir_name1) > 0)
        self.assertTrue(len(dir_name2) > 0)
        self.assertTrue(dir_name1 != dir_name2)
        return (dir_name1, dir_name2)

    def make_one_named_test_directory(self, name, depth, width):
        """
        Create a randomly named directory under tmp/, removing any
        existing directory of that name.
        """

        dir_path = "tmp/%s" % name
        if os.path.exists(dir_path):
            if os.path.isfile(dir_path):
                os.unlink(dir_path)
            elif os.path.isdir(dir_path):
                shutil.rmtree(dir_path)
        self.rng.next_data_dir(dir_path, depth, width, 32)
        return dir_path

    def make_two_test_directories(self, depth, width):
        """
        Generate two different names, using them to create subdirectories
        of tmp/.
        """
        dir_name1 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_path1 = self.make_one_named_test_directory(dir_name1, depth, width)

        dir_name2 = dir_name1
        while dir_name2 == dir_name1:
            dir_name2 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_path2 = self.make_one_named_test_directory(dir_name2, depth, width)

        return (dir_name1, dir_path1, dir_name2, dir_path2)

    def verify_leaf_hash(self, node, path_to_file, hashtype):
        """
        Verify that a MerkleLeaf correctly describes a file, given a hash type.
        """
        check_hashtype(hashtype)
        self.assertTrue(os.path.exists(path_to_file))
        with open(path_to_file, "rb") as file:
            data = file.read()
        self.assertFalse(data is None)
        if hashtype == HashTypes.SHA1:
            sha = XLSHA1()
        elif hashtype == HashTypes.SHA2:
            sha = XLSHA2()
        elif hashtype == HashTypes.SHA3:
            # pylint: disable=no-member
            sha = XLSHA3()
        elif hashtype == HashTypes.BLAKE2B_256:
            # pylint: disable=no-member
            sha = XLBLAKE2B_256()
        else:
            raise NotImplementedError
        sha.update(data)
        hash_ = sha.digest()
        self.assertEqual(hash_, node.bin_hash)

    def verify_tree_hash(self, node, path_to_tree, hashtype):
        """
        Given a MerkleTree, verify that it correctly describes the
        directory whose path is passed.
        """
        # we assume that the node is a MerkleTree
        check_hashtype(hashtype)
        if node.nodes is None:
            self.assertEqual(None, node.bin_hash)
        else:
            hash_count = 0
            if hashtype == HashTypes.SHA1:
                sha = XLSHA1()
            elif hashtype == HashTypes.SHA2:
                sha = XLSHA2()
            elif hashtype == HashTypes.SHA3:
                # pylint: disable=no-member
                sha = XLSHA3()
            elif hashtype == HashTypes.BLAKE2B_256:
                sha = XLBLAKE2B_256()
            else:
                raise NotImplementedError
            for node_ in node.nodes:
                path_to_node = os.path.join(path_to_tree, node_.name)
                if isinstance(node_, MerkleLeaf):
                    self.verify_leaf_hash(node_, path_to_node, hashtype)
                elif isinstance(node_, MerkleTree):
                    self.verify_tree_hash(node_, path_to_node, hashtype)
                else:
                    print("DEBUG: unknown node type!")
                    self.fail("unknown node type!")
                if node_.bin_hash is not None:
                    hash_count += 1
                    sha.update(node_.bin_hash)

            if hash_count == 0:
                self.assertEqual(None, node.bin_hash)
            else:
                self.assertEqual(sha.digest(), node.bin_hash)

    # actual unit tests #############################################

    def test_bound_flat_dirs(self):
        """test directory is single level, with four data files"""
        for hashtype in HashTypes:
            self.do_test_bound_flat_dirs(hashtype)

    def do_test_bound_flat_dirs(self, hashtype):
        """ Test two flat directories with the specified hash type. """

        (dir_name1, dir_path1, dir_name2, dir_path2) =\
            self.make_two_test_directories(ONE, FOUR)

        doc1 = MerkleDoc.create_from_file_system(dir_path1, hashtype)
        tree1 = doc1.tree
        self.assertTrue(isinstance(tree1, MerkleTree))
        # pylint: disable=no-member
        self.assertEqual(dir_name1, tree1.name)
        self.assertTrue(doc1.bound)
        self.assertEqual(("tmp/%s" % dir_name1), dir_path1)
        # pylint: disable=no-member
        nodes1 = tree1.nodes
        self.assertTrue(nodes1 is not None)
        self.assertEqual(FOUR, len(nodes1))
        self.verify_tree_hash(tree1, dir_path1, hashtype)

        doc2 = MerkleDoc.create_from_file_system(dir_path2, hashtype)
        tree2 = doc2.tree
        # pylint: disable=no-member
        self.assertEqual(dir_name2, tree2.name)
        self.assertTrue(doc2.bound)
        self.assertEqual(("tmp/%s" % dir_name2), dir_path2)
        # pylint: disable=no-member
        nodes2 = tree2.nodes
        self.assertTrue(nodes2 is not None)
        self.assertEqual(FOUR, len(nodes2))
        self.verify_tree_hash(tree2, dir_path2, hashtype)

        self.assertEqual(tree1, tree1)
        self.assertFalse(tree1 == tree2)
        self.assertFalse(tree1 is None)

        doc1_str = doc1.to_string()
        doc1_rebuilt = MerkleDoc.create_from_serialization(doc1_str, hashtype)
        # DEBUG
        # print("flat doc:\n" + doc1Str)
        # print("rebuilt flat doc:\n" + doc1Rebuilt.toString())
        # END
        self.assertTrue(doc1 == doc1_rebuilt)

    def test_bound_needle_dirs(self):
        """test directories four deep with one data file at the lowest level"""
        for hashtype in HashTypes:
            self.do_test_bound_needle_dirs(hashtype)

    def do_test_bound_needle_dirs(self, hashtype):
        """ Run tests on two deeper directories. """
        check_hashtype(hashtype)
        (dir_name1, dir_path1, dir_name2, dir_path2) =\
            self.make_two_test_directories(FOUR, ONE)
        doc1 = MerkleDoc.create_from_file_system(dir_path1, hashtype)
        tree1 = doc1.tree
        # pylint: disable=no-member
        self.assertEqual(dir_name1, tree1.name)
        self.assertTrue(doc1.bound)
        self.assertEqual(("tmp/%s" % dir_name1), dir_path1)
        # pylint: disable=no-member
        nodes1 = tree1.nodes
        self.assertTrue(nodes1 is not None)
        self.assertEqual(ONE, len(nodes1))
        self.verify_tree_hash(tree1, dir_path1, hashtype)

        doc2 = MerkleDoc.create_from_file_system(dir_path2, hashtype)
        tree2 = doc2.tree
        # pylint: disable=no-member
        self.assertEqual(dir_name2, tree2.name)
        self.assertTrue(doc2.bound)
        self.assertEqual(("tmp/%s" % dir_name2), dir_path2)
        # pylint: disable=no-member
        nodes2 = tree2.nodes
        self.assertTrue(nodes2 is not None)
        self.assertEqual(ONE, len(nodes2))
        self.verify_tree_hash(tree2, dir_path2, hashtype)

        self.assertTrue(doc1 == doc1)
        self.assertFalse(doc1 == doc2)

        doc1_str = doc1.to_string()
        doc1_rebuilt = MerkleDoc.create_from_serialization(doc1_str, hashtype)
#       # DEBUG
#       print "needle doc:\n" + doc1Str
#       print "rebuilt needle doc:\n" + doc1Rebuilt.toString()
#       # END
        self.assertTrue(doc1 == doc1_rebuilt)
Esempio n. 9
0
class TestMerkleDoc(unittest.TestCase):

    def setUp(self):
        self.rng = SimpleRNG(time.time())

    def tearDown(self):
        pass

    # utility functions #############################################
    def get_two_unique_directory_names(self):
        dir_name1 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_name2 = dir_name1
        while dir_name2 == dir_name1:
            dir_name2 = self.rng.next_file_name(MAX_NAME_LEN)
        self.assertTrue(len(dir_name1) > 0)
        self.assertTrue(len(dir_name2) > 0)
        self.assertTrue(dir_name1 != dir_name2)
        return (dir_name1, dir_name2)

    def make_one_named_test_directory(self, name, depth, width):
        dir_path = "tmp/%s" % name
        if os.path.exists(dir_path):
            shutil.rmtree(dir_path)
        self.rng.next_data_dir(dir_path, depth, width, 32)
        return dir_path

    def make_two_test_directories(self, depth, width):
        dir_name1 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_path1 = self.make_one_named_test_directory(dir_name1, depth, width)

        dir_name2 = dir_name1
        while dir_name2 == dir_name1:
            dir_name2 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_path2 = self.make_one_named_test_directory(dir_name2, depth, width)

        return (dir_name1, dir_path1, dir_name2, dir_path2)

    def verify_leaf_sha(self, node, path_to_file, hashtype):
        check_hashtype(hashtype)
        self.assertTrue(os.path.exists(path_to_file))
        with open(path_to_file, "rb") as file:
            data = file.read()
        self.assertFalse(data is None)
        # pylint: disable=redefined-variable-type
        if hashtype == HashTypes.SHA1:
            sha = hashlib.sha1()
        elif hashtype == HashTypes.SHA2:
            sha = hashlib.sha256()
        elif hashtype == HashTypes.SHA3:
            # pylint: disable=no-member
            sha = hashlib.sha3_256()
        sha.update(data)
        hash_ = sha.digest()
        self.assertEqual(hash_, node.bin_hash)

    def verify_tree_sha(self, node, path_to_tree, hashtype):
        # we assume that the node is a MerkleTree
        check_hashtype(hashtype)
        if node.nodes is None:
            self.assertEqual(None, node.bin_hash)
        else:
            hash_count = 0
            # pylint: disable=redefined-variable-type
            if hashtype == HashTypes.SHA1:
                sha = hashlib.sha1()
            elif hashtype == HashTypes.SHA2:
                sha = hashlib.sha256()
            elif hashtype == HashTypes.SHA3:
                # pylint: disable=no-member
                sha = hashlib.sha3_256()
            for node_ in node.nodes:
                path_to_node = os.path.join(path_to_tree, node_.name)
                if isinstance(node_, MerkleLeaf):
                    self.verify_leaf_sha(node_, path_to_node, hashtype)
                elif isinstance(node_, MerkleTree):
                    self.verify_tree_sha(node_, path_to_node, hashtype)
                else:
                    print("DEBUG: unknown node type!")
                    self.fail("unknown node type!")
                if node_.bin_hash is not None:
                    hash_count += 1
                    sha.update(node_.bin_hash)

            if hash_count == 0:
                self.assertEqual(None, node.bin_hash)
            else:
                self.assertEqual(sha.digest(), node.bin_hash)

    # actual unit tests #############################################

    def test_bound_flat_dirs(self):
        """test directory is single level, with four data files"""
        for using in [HashTypes.SHA1, HashTypes.SHA2, HashTypes.SHA3, ]:
            self.do_test_bound_flat_dirs(using)

    def do_test_bound_flat_dirs(self, hashtype):

        (dir_name1, dir_path1, dir_name2, dir_path2) =\
            self.make_two_test_directories(ONE, FOUR)

        doc1 = MerkleDoc.create_from_file_system(dir_path1, hashtype)
        tree1 = doc1.tree
        self.assertTrue(isinstance(tree1, MerkleTree))
        # pylint: disable=no-member
        self.assertEqual(dir_name1, tree1.name)
        self.assertTrue(doc1.bound)
        self.assertEqual(("tmp/%s" % dir_name1), dir_path1)
        # pylint: disable=no-member
        nodes1 = tree1.nodes
        self.assertTrue(nodes1 is not None)
        self.assertEqual(FOUR, len(nodes1))
        self.verify_tree_sha(tree1, dir_path1, hashtype)

        doc2 = MerkleDoc.create_from_file_system(dir_path2, hashtype)
        tree2 = doc2.tree
        # pylint: disable=no-member
        self.assertEqual(dir_name2, tree2.name)
        self.assertTrue(doc2.bound)
        self.assertEqual(("tmp/%s" % dir_name2), dir_path2)
        # pylint: disable=no-member
        nodes2 = tree2.nodes
        self.assertTrue(nodes2 is not None)
        self.assertEqual(FOUR, len(nodes2))
        self.verify_tree_sha(tree2, dir_path2, hashtype)

        self.assertEqual(tree1, tree1)
        self.assertFalse(tree1 == tree2)
        self.assertFalse(tree1 is None)

        doc1_str = doc1.to_string()
        doc1_rebuilt = MerkleDoc.create_from_serialization(doc1_str, hashtype)
        # DEBUG
        #print("flat doc:\n" + doc1Str)
        #print("rebuilt flat doc:\n" + doc1Rebuilt.toString())
        # END
        self.assertTrue(doc1.equal(doc1_rebuilt))  # MANGO

    def test_bound_needle_dirs(self):
        """test directories four deep with one data file at the lowest level"""
        for using in [HashTypes.SHA1, HashTypes.SHA2, HashTypes.SHA3, ]:
            self.do_test_bound_needle_dirs(using)

    def do_test_bound_needle_dirs(self, hashtype):
        check_hashtype(hashtype)
        (dir_name1, dir_path1, dir_name2, dir_path2) =\
            self.make_two_test_directories(FOUR, ONE)
        doc1 = MerkleDoc.create_from_file_system(dir_path1, hashtype)
        tree1 = doc1.tree
        # pylint: disable=no-member
        self.assertEqual(dir_name1, tree1.name)
        self.assertTrue(doc1.bound)
        self.assertEqual(("tmp/%s" % dir_name1), dir_path1)
        # pylint: disable=no-member
        nodes1 = tree1.nodes
        self.assertTrue(nodes1 is not None)
        self.assertEqual(ONE, len(nodes1))
        self.verify_tree_sha(tree1, dir_path1, hashtype)

        doc2 = MerkleDoc.create_from_file_system(dir_path2, hashtype)
        tree2 = doc2.tree
        # pylint: disable=no-member
        self.assertEqual(dir_name2, tree2.name)
        self.assertTrue(doc2.bound)
        self.assertEqual(("tmp/%s" % dir_name2), dir_path2)
        # pylint: disable=no-member
        nodes2 = tree2.nodes
        self.assertTrue(nodes2 is not None)
        self.assertEqual(ONE, len(nodes2))
        self.verify_tree_sha(tree2, dir_path2, hashtype)

        self.assertTrue(doc1.equal(doc1))
        self.assertFalse(doc1.equal(doc2))

        doc1_str = doc1.to_string()
        doc1_rebuilt = MerkleDoc.create_from_serialization(doc1_str, hashtype)
#       # DEBUG
#       print "needle doc:\n" + doc1Str
#       print "rebuilt needle doc:\n" + doc1Rebuilt.toString()
#       # END
        self.assertTrue(doc1.equal(doc1_rebuilt))       # FOO
Esempio n. 10
0
class TestNLHTree2(unittest.TestCase):
    """ Test trees derived from various quasi-random directory structures. """

    def setUp(self):
        self.rng = SimpleRNG(time.time())

    def tearDown(self):
        pass

    # utility functions ---------------------------------------------

    def get_two_unique_directory_names(self):
        """ Make two unique directory names. """

        dir_name1 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_name2 = dir_name1
        while dir_name2 == dir_name1:
            dir_name2 = self.rng.next_file_name(MAX_NAME_LEN)
        self.assertTrue(len(dir_name1) > 0)
        self.assertTrue(len(dir_name2) > 0)
        self.assertTrue(dir_name1 != dir_name2)
        return (dir_name1, dir_name2)

    def make_one_named_test_directory(self, name, depth, width):
        """
        Create a test directory below tmp/ with specified characteristics.
        """

        dir_path = "tmp/%s" % name
        if os.path.exists(dir_path):
            shutil.rmtree(dir_path)
        self.rng.next_data_dir(dir_path, depth, width, 32)
        return dir_path

    def make_two_test_directories(self, depth, width):
        """ Make two distinct quasi-random test directories below tmp/. """

        dir_name1 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_path1 = self.make_one_named_test_directory(dir_name1, depth, width)

        dir_name2 = dir_name1
        while dir_name2 == dir_name1:
            dir_name2 = self.rng.next_file_name(MAX_NAME_LEN)
        dir_path2 = self.make_one_named_test_directory(dir_name2, depth, width)

        return (dir_name1, dir_path1, dir_name2, dir_path2)

    # unit tests ----------------------------------------------------

    def test_pathless_unbound(self):
        """ Test the constructor using various hash types. """

        for hashtype in [HashTypes.SHA1, HashTypes.SHA2, HashTypes.SHA3, ]:
            self.do_test_pathless_unbound(hashtype)

    def do_test_pathless_unbound(self, hashtype):
        """
        Test constructor using two directories and a specific hash type.
        """

        (dir_name1, dir_name2) = self.get_two_unique_directory_names()

        check_hashtype(hashtype)
        tree1 = NLHTree(dir_name1, hashtype)
        self.assertEqual(dir_name1, tree1.name)
        self.assertEqual(tree1.hashtype, hashtype)

        tree2 = NLHTree(dir_name2, hashtype)
        self.assertEqual(dir_name2, tree2.name)
        self.assertEqual(tree2.hashtype, hashtype)

        self.assertTrue(tree1 == tree1)
        self.assertFalse(tree1 == tree2)
        self.assertFalse(tree1 is None)

        tree1c = tree1.clone()
        self.assertEqual(tree1c, tree1)

    def test_bound_flat_dirs(self):
        """
        Test directory is single level, with four data files, using
        various hash types.
        """
        for hashtype in HashTypes:
            self.do_test_bound_flat_dirs(hashtype)

    def do_test_bound_flat_dirs(self, hashtype):
        """
        Test directory is single level, with four data files, using
        specific hash type.
        """

        (dir_name1, dir_path1, dir_name2, dir_path2) =\
            self.make_two_test_directories(ONE, FOUR)
        tree1 = NLHTree.create_from_file_system(dir_path1, hashtype)
        self.assertEqual(dir_name1, tree1.name, True)
        nodes1 = tree1.nodes
        self.assertTrue(nodes1 is not None)
        self.assertEqual(FOUR, len(nodes1))

        tree2 = NLHTree.create_from_file_system(dir_path2, hashtype)
        self.assertEqual(dir_name2, tree2.name)
        nodes2 = tree2.nodes
        self.assertTrue(nodes2 is not None)
        self.assertEqual(FOUR, len(nodes2))

        self.assertEqual(tree1, tree1)
        self.assertFalse(tree1 == tree2)
        self.assertFalse(tree1 is None)

        tree1c = tree1.clone()
        self.assertEqual(tree1c, tree1)

    def test_bound_needle_dirs1(self):
        """
        Test directories four deep with one data file at the lowest level
        using various hash types.
        """
        for hashtype in HashTypes:
            self.do_test_bound_needle_dirs(hashtype)

    def do_test_bound_needle_dirs(self, hashtype):
        """
        Test directories four deep with one data file at the lowest level
        using specific hash type.
        """
        (dir_name1, dir_path1, dir_name2, dir_path2) =\
            self.make_two_test_directories(FOUR, ONE)
        tree1 = NLHTree.create_from_file_system(dir_path1, hashtype)

        self.assertEqual(dir_name1, tree1.name)
        nodes1 = tree1.nodes
        self.assertTrue(nodes1 is not None)
        self.assertEqual(ONE, len(nodes1))

        tree2 = NLHTree.create_from_file_system(dir_path2, hashtype)
        self.assertEqual(dir_name2, tree2.name)
        nodes2 = tree2.nodes
        self.assertTrue(nodes2 is not None)
        self.assertEqual(ONE, len(nodes2))

        self.assertTrue(tree1 == tree1)
        self.assertFalse(tree1 == tree2)

        tree1c = tree1.clone()
        self.assertEqual(tree1c, tree1)