def find_and_validate_md5_checksums(in_folder, md5key_filename):
    """
    Given a root folder to search in and the name of a file containing
    textual info about file checksums to validate, this function will
    return True if the checksum is correct. Otherwise an
    InvenioFileChecksumError is returned.

    The filename containing the MD5 hashkey is expected to follow this
    structure:

    hashkey filepath
    """
    validated_files = []
    for filename in locate(md5key_filename, root=in_folder):
        file_fd = open(filename, 'r')
        contents = file_fd.readlines()
        for line in contents:
            split_line = line.split(' ')
            if len(split_line) == 2:
                hashkey, hashkey_target = split_line
                hashkey_target = os.path.join(os.path.dirname(filename),
                                              hashkey_target.strip())
                hashkey = hashkey.strip()
                found_hashkey = calculate_md5_external(hashkey_target).strip()
                if found_hashkey != hashkey:
                    raise InvenioFileChecksumError("Error matching checksum of %s:"
                                                   " %s is not equal to %s" %
                                                   (hashkey_target,
                                                    found_hashkey,
                                                    hashkey))
                validated_files.append(hashkey_target)
    return validated_files
def find_and_validate_md5_checksums(in_folder, md5key_filename):
    """
    Given a root folder to search in and the name of a file containing
    textual info about file checksums to validate, this function will
    return True if the checksum is correct. Otherwise an
    InvenioFileChecksumError is returned.

    The filename containing the MD5 hashkey is expected to follow this
    structure:

    hashkey filepath
    """
    validated_files = []
    for filename in locate(md5key_filename, root=in_folder):
        file_fd = open(filename, 'r')
        contents = file_fd.readlines()
        for line in contents:
            split_line = line.split(' ')
            if len(split_line) == 2:
                hashkey, hashkey_target = split_line
                hashkey_target = os.path.join(os.path.dirname(filename),
                                              hashkey_target.strip())
                hashkey = hashkey.strip()
                found_hashkey = calculate_md5_external(hashkey_target).strip()
                if found_hashkey != hashkey:
                    raise InvenioFileChecksumError(
                        "Error matching checksum of %s:"
                        " %s is not equal to %s" %
                        (hashkey_target, found_hashkey, hashkey))
                validated_files.append(hashkey_target)
    return validated_files
Exemple #3
0
 def test_md5_algorithms(self):
     """bibdocfile - compare md5 algorithms"""
     from invenio.bibdocfile import calculate_md5, \
         calculate_md5_external
     filepath = os.path.join(self.path, 'test.txt')
     open(filepath, "w").write("test")
     self.assertEqual(calculate_md5(filepath, force_internal=True),
                      calculate_md5_external(filepath))
Exemple #4
0
    def test_md5_check(self):
        """
        Test md5 checking done by APS Harvester.
        """
        # Create temporary file to test with
        hashtarget_filepath = get_temporary_file(directory="/tmp")
        tmpfd = open(hashtarget_filepath, "w")
        tmpfd.write("this is a test")
        tmpfd.close()
        dirpath, filename = os.path.split(hashtarget_filepath)

        hashtarget_md5 = calculate_md5_external(hashtarget_filepath)

        # Create a md5 keyfile looking like:
        # 54b0c58c7ce9f2a8b551351102ee0938 apsharvest_test_lFecZz
        md5_keyfile = get_temporary_file(directory="/tmp")
        tmpfd = open(md5_keyfile, "w")
        tmpfd.write("%s %s\n" % (hashtarget_md5, filename))
        tmpfd.close()

        dirpath, filename = os.path.split(md5_keyfile)
        res = find_and_validate_md5_checksums(in_folder=dirpath, md5key_filename=filename)
        self.assertTrue(len(res) == 1)
Exemple #5
0
    def test_md5_check(self):
        """
        Test md5 checking done by APS Harvester.
        """
        # Create temporary file to test with
        hashtarget_filepath = get_temporary_file(directory="/tmp")
        tmpfd = open(hashtarget_filepath, 'w')
        tmpfd.write("this is a test")
        tmpfd.close()
        dirpath, filename = os.path.split(hashtarget_filepath)

        hashtarget_md5 = calculate_md5_external(hashtarget_filepath)

        # Create a md5 keyfile looking like:
        # 54b0c58c7ce9f2a8b551351102ee0938 apsharvest_test_lFecZz
        md5_keyfile = get_temporary_file(directory="/tmp")
        tmpfd = open(md5_keyfile, 'w')
        tmpfd.write("%s %s\n" % (hashtarget_md5, filename))
        tmpfd.close()

        dirpath, filename = os.path.split(md5_keyfile)
        res = find_and_validate_md5_checksums(in_folder=dirpath,
                                              md5key_filename=filename)
        self.assertTrue(len(res) == 1)
 def test_md5_algorithms(self):
     """bibdocfile - compare md5 algorithms"""
     filepath = os.path.join(self.path, "test.txt")
     open(filepath, "w").write("test")
     self.assertEqual(calculate_md5(filepath, force_internal=True), calculate_md5_external(filepath))