Example #1
0
    def test_md5sum(self):
        """Check that the file_hash.md5sum function returns the right messages
        digest for some known test files."""

        # Test "data/test_file.txt"

        file_path = os.path.join(DATA_DIRNAME, "test_file.txt")

        expected_str = "335387a52cfc8f1d3fda510b12dfc200"
        hex_str = file_hash.md5sum(file_path)

        self.assertEqual(hex_str, expected_str)

        # Test "data/test_file.bin"

        file_path = os.path.join(DATA_DIRNAME, "test_file.bin")

        expected_str = "8c1379aa4207f5b4b801a49b75e826c8"
        hex_str = file_hash.md5sum(file_path)

        self.assertEqual(hex_str, expected_str)

        # Test "data/test_file.empty"

        file_path = os.path.join(DATA_DIRNAME, "test_file.empty")

        expected_str = "d41d8cd98f00b204e9800998ecf8427e"
        hex_str = file_hash.md5sum(file_path)

        self.assertEqual(hex_str, expected_str)
Example #2
0
    def test_md5sum(self):
        """Check that the file_hash.md5sum function returns the right messages
        digest for some known test files."""

        # Test "data/test_file.txt"

        file_path = os.path.join(DATA_DIRNAME, "test_file.txt")

        expected_str = "335387a52cfc8f1d3fda510b12dfc200"
        hex_str = file_hash.md5sum(file_path)

        self.assertEqual(hex_str, expected_str)

        # Test "data/test_file.bin"

        file_path = os.path.join(DATA_DIRNAME, "test_file.bin")

        expected_str = "8c1379aa4207f5b4b801a49b75e826c8"
        hex_str = file_hash.md5sum(file_path)

        self.assertEqual(hex_str, expected_str)

        # Test "data/test_file.empty"

        file_path = os.path.join(DATA_DIRNAME, "test_file.empty")

        expected_str = "d41d8cd98f00b204e9800998ecf8427e"
        hex_str = file_hash.md5sum(file_path)

        self.assertEqual(hex_str, expected_str)
Example #3
0
def walk(root_path, db):
    """Walk the tree starting from "root_path" and build the {path:md5,...}
    dictionary"""

    local_file_dict = {}   # dict = {path: md5, ...}
    local_dir_dict = {}    # dict = {path: md5, ...}

    # current_dir_path = a string, the path to the directory.
    # dir_names        = a list of the names (strings) of the subdirectories in
    #                    current_dir_path (excluding '.' and '..').
    # file_names       = a list of the names (strings) of the non-directory files
    #                    in current_dir_path.
    for current_dir_path, dir_names, file_names in os.walk(root_path, topdown=False, followlinks=False):

        # ABSOLUTE PATH OF current_dir_path
        current_dir_path = os.path.abspath(current_dir_path)

        # MAKE THE MD5 LIST OF CURRENT_DIR'S CONTENT (REQUIRED TO COMPUTE
        # CURRENT_DIR'S MD5)
        current_dir_md5_list = []

        # CHILD FILES
        for file_name in file_names:
            file_path = os.path.join(current_dir_path, file_name)

            if not os.path.islink(file_path):
                file_mtime = os.path.getmtime(file_path)
                file_size = os.path.getsize(file_path)
                file_md5 = None

                if db is not None:
                    if file_path in db:
                        db_file_mtime, db_file_size, db_file_md5 = db[file_path].split()
                        if file_mtime == db_file_mtime and file_size == db_file_size:
                            # The file is known and hasn't changed since the
                            # last walk => don't compute the MD5, use the one
                            # in db.
                            file_md5 = db_file_md5

                if file_md5 is None:
                    file_md5 = md5sum(file_path)
                    if db is not None:
                        db[file_path] = "{0} {1} {2}".format(file_mtime, file_size, file_md5)

                local_file_dict[file_path] = file_md5

                current_dir_md5_list.append(file_md5)
#            else:
#                warnings.warn("ignore link " + file_path, UserWarning)

        # CHILD DIRECTORIES
        for dir_name in dir_names:
            dir_path = os.path.join(current_dir_path, dir_name)

            if not os.path.islink(dir_path):
                try:
                    dir_md5 = local_dir_dict[dir_path]
                    current_dir_md5_list.append(dir_md5)
                except KeyError:
                    ## "local_dir_dict[dir_path]" should exists as we are doing a bottom-up tree walk
                    #print 'Internal error. Check whether or not "topdown" argument is set to "False" in os.walk function call.'
                    #print dir_path, "key doesn't exist in \"local_dir_dict\" dictionary."
                    #sys.exit(4)
                    warnings.warn("can't access " + dir_path, UserWarning)
#            else:
#                warnings.warn("ignore link " + dir_path, UserWarning)

        # CURRENT_DIRECTORY'S MD5
        current_dir_md5_generator = hashlib.md5()

        # current_dir_md5_list have to be sorted because even for an identical
        # set of items, different order implies different MD5
        current_dir_md5_list.sort()

        for item in current_dir_md5_list:
            current_dir_md5_generator.update(bytes(item, 'utf-8'))  # TODO
        local_dir_dict[current_dir_path] = current_dir_md5_generator.hexdigest()

    return local_file_dict, local_dir_dict
Example #4
0
def walk(root_path, db):
    """Walk the tree starting from "root_path" and build the {path:md5,...}
    dictionary"""

    local_file_dict = {}  # dict = {path: md5, ...}
    local_dir_dict = {}  # dict = {path: md5, ...}

    # current_dir_path = a string, the path to the directory.
    # dir_names        = a list of the names (strings) of the subdirectories in
    #                    current_dir_path (excluding '.' and '..').
    # file_names       = a list of the names (strings) of the non-directory files
    #                    in current_dir_path.
    for current_dir_path, dir_names, file_names in os.walk(root_path,
                                                           topdown=False,
                                                           followlinks=False):

        # ABSOLUTE PATH OF current_dir_path
        current_dir_path = os.path.abspath(current_dir_path)

        # MAKE THE MD5 LIST OF CURRENT_DIR'S CONTENT (REQUIRED TO COMPUTE
        # CURRENT_DIR'S MD5)
        current_dir_md5_list = []

        # CHILD FILES
        for file_name in file_names:
            file_path = os.path.join(current_dir_path, file_name)

            if not os.path.islink(file_path):
                file_mtime = os.path.getmtime(file_path)
                file_size = os.path.getsize(file_path)
                file_md5 = None

                if db is not None:
                    if file_path in db:
                        db_file_mtime, db_file_size, db_file_md5 = db[
                            file_path].split()
                        if file_mtime == db_file_mtime and file_size == db_file_size:
                            # The file is known and hasn't changed since the
                            # last walk => don't compute the MD5, use the one
                            # in db.
                            file_md5 = db_file_md5

                if file_md5 is None:
                    file_md5 = md5sum(file_path)
                    if db is not None:
                        db[file_path] = "{0} {1} {2}".format(
                            file_mtime, file_size, file_md5)

                local_file_dict[file_path] = file_md5

                current_dir_md5_list.append(file_md5)
#            else:
#                warnings.warn("ignore link " + file_path, UserWarning)

# CHILD DIRECTORIES
        for dir_name in dir_names:
            dir_path = os.path.join(current_dir_path, dir_name)

            if not os.path.islink(dir_path):
                try:
                    dir_md5 = local_dir_dict[dir_path]
                    current_dir_md5_list.append(dir_md5)
                except KeyError:
                    ## "local_dir_dict[dir_path]" should exists as we are doing a bottom-up tree walk
                    #print 'Internal error. Check whether or not "topdown" argument is set to "False" in os.walk function call.'
                    #print dir_path, "key doesn't exist in \"local_dir_dict\" dictionary."
                    #sys.exit(4)
                    warnings.warn("can't access " + dir_path, UserWarning)
#            else:
#                warnings.warn("ignore link " + dir_path, UserWarning)

# CURRENT_DIRECTORY'S MD5
        current_dir_md5_generator = hashlib.md5()

        # current_dir_md5_list have to be sorted because even for an identical
        # set of items, different order implies different MD5
        current_dir_md5_list.sort()

        for item in current_dir_md5_list:
            current_dir_md5_generator.update(bytes(item, 'utf-8'))  # TODO
        local_dir_dict[current_dir_path] = current_dir_md5_generator.hexdigest(
        )

    return local_file_dict, local_dir_dict