def find_and_validate_md5_checksums(in_folder, md5key_filename): """ Given a root folder to search in and the name of a file containing textual info about file checksums to validate, this function will return True if the checksum is correct. Otherwise an InvenioFileChecksumError is returned. The filename containing the MD5 hashkey is expected to follow this structure: hashkey filepath """ validated_files = [] for filename in locate(md5key_filename, root=in_folder): file_fd = open(filename, 'r') contents = file_fd.readlines() for line in contents: split_line = line.split(' ') if len(split_line) == 2: hashkey, hashkey_target = split_line hashkey_target = os.path.join(os.path.dirname(filename), hashkey_target.strip()) hashkey = hashkey.strip() found_hashkey = calculate_md5_external(hashkey_target).strip() if found_hashkey != hashkey: raise InvenioFileChecksumError("Error matching checksum of %s:" " %s is not equal to %s" % (hashkey_target, found_hashkey, hashkey)) validated_files.append(hashkey_target) return validated_files
def find_and_validate_md5_checksums(in_folder, md5key_filename): """ Given a root folder to search in and the name of a file containing textual info about file checksums to validate, this function will return True if the checksum is correct. Otherwise an InvenioFileChecksumError is returned. The filename containing the MD5 hashkey is expected to follow this structure: hashkey filepath """ validated_files = [] for filename in locate(md5key_filename, root=in_folder): file_fd = open(filename, 'r') contents = file_fd.readlines() for line in contents: split_line = line.split(' ') if len(split_line) == 2: hashkey, hashkey_target = split_line hashkey_target = os.path.join(os.path.dirname(filename), hashkey_target.strip()) hashkey = hashkey.strip() found_hashkey = calculate_md5_external(hashkey_target).strip() if found_hashkey != hashkey: raise InvenioFileChecksumError( "Error matching checksum of %s:" " %s is not equal to %s" % (hashkey_target, found_hashkey, hashkey)) validated_files.append(hashkey_target) return validated_files
def test_md5_algorithms(self): """bibdocfile - compare md5 algorithms""" from invenio.bibdocfile import calculate_md5, \ calculate_md5_external filepath = os.path.join(self.path, 'test.txt') open(filepath, "w").write("test") self.assertEqual(calculate_md5(filepath, force_internal=True), calculate_md5_external(filepath))
def test_md5_check(self): """ Test md5 checking done by APS Harvester. """ # Create temporary file to test with hashtarget_filepath = get_temporary_file(directory="/tmp") tmpfd = open(hashtarget_filepath, "w") tmpfd.write("this is a test") tmpfd.close() dirpath, filename = os.path.split(hashtarget_filepath) hashtarget_md5 = calculate_md5_external(hashtarget_filepath) # Create a md5 keyfile looking like: # 54b0c58c7ce9f2a8b551351102ee0938 apsharvest_test_lFecZz md5_keyfile = get_temporary_file(directory="/tmp") tmpfd = open(md5_keyfile, "w") tmpfd.write("%s %s\n" % (hashtarget_md5, filename)) tmpfd.close() dirpath, filename = os.path.split(md5_keyfile) res = find_and_validate_md5_checksums(in_folder=dirpath, md5key_filename=filename) self.assertTrue(len(res) == 1)
def test_md5_check(self): """ Test md5 checking done by APS Harvester. """ # Create temporary file to test with hashtarget_filepath = get_temporary_file(directory="/tmp") tmpfd = open(hashtarget_filepath, 'w') tmpfd.write("this is a test") tmpfd.close() dirpath, filename = os.path.split(hashtarget_filepath) hashtarget_md5 = calculate_md5_external(hashtarget_filepath) # Create a md5 keyfile looking like: # 54b0c58c7ce9f2a8b551351102ee0938 apsharvest_test_lFecZz md5_keyfile = get_temporary_file(directory="/tmp") tmpfd = open(md5_keyfile, 'w') tmpfd.write("%s %s\n" % (hashtarget_md5, filename)) tmpfd.close() dirpath, filename = os.path.split(md5_keyfile) res = find_and_validate_md5_checksums(in_folder=dirpath, md5key_filename=filename) self.assertTrue(len(res) == 1)
def test_md5_algorithms(self): """bibdocfile - compare md5 algorithms""" filepath = os.path.join(self.path, "test.txt") open(filepath, "w").write("test") self.assertEqual(calculate_md5(filepath, force_internal=True), calculate_md5_external(filepath))