def restructureForComplianceFileUUIDsAssigned( job, unit_path, unit_uuid, unit_type='sip_id', unit_path_replacement='%SIPDirectory%'): # Create required directories archivematicaFunctions.create_directories(REQUIRED_DIRECTORIES, unit_path, printing=True, printfn=job.pyprint) unit_path = os.path.join(unit_path, '') # Ensure both end with / objects_path = os.path.join(unit_path, 'objects', '') # Move everything else to the objects directory, updating DB with new path for entry in os.listdir(unit_path): entry_path = os.path.join(unit_path, entry) if os.path.isfile(entry_path) and entry not in OPTIONAL_FILES: # Move to objects src = os.path.join(unit_path, entry) dst = os.path.join(objects_path, entry) fileOperations.updateFileLocation2( src=src, dst=dst, unitPath=unit_path, unitIdentifier=unit_uuid, unitIdentifierType=unit_type, # sipUUID or transferUUID unitPathReplaceWith=unit_path_replacement, printfn=job.pyprint) if os.path.isdir(entry_path) and entry not in REQUIRED_DIRECTORIES: # Make directory at new location if not exists entry_objects_path = entry_path.replace(unit_path, objects_path) if not os.path.exists(entry_objects_path): job.pyprint('Creating directory:', entry_objects_path) os.mkdir(entry_objects_path) # Walk and move to objects dir, preserving directory structure # and updating the DB for dirpath, dirnames, filenames in os.walk(entry_path): # Create children dirs in new location, otherwise move fails for dirname in dirnames: create_dir = os.path.join(dirpath, dirname).replace( unit_path, objects_path) if not os.path.exists(create_dir): job.pyprint('Creating directory:', create_dir) os.makedirs(create_dir) # Move files for filename in filenames: src = os.path.join(dirpath, filename) dst = src.replace(unit_path, objects_path) fileOperations.updateFileLocation2( src=src, dst=dst, unitPath=unit_path, unitIdentifier=unit_uuid, unitIdentifierType=unit_type, # sipUUID or transferUUID unitPathReplaceWith=unit_path_replacement, printfn=job.pyprint) # Delete entry_path if it exists (is empty dir) job.pyprint('Removing directory', entry_path) shutil.rmtree(entry_path)
def restructureTRIMForComplianceFileUUIDsAssigned( unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith="%transferDirectory%"): for dir in requiredDirectories: reqDirPath = os.path.join(unitPath, dir) if not os.path.isdir(reqDirPath): os.mkdir(reqDirPath) for item in os.listdir(unitPath): if item in requiredDirectories: continue src = os.path.join(unitPath, item) if os.path.isdir(src): objectsDir = os.path.join(unitPath, "objects", item) os.mkdir(objectsDir) for item2 in os.listdir(src): itemPath = os.path.join(src, item2) dst = os.path.join(objectsDir, item2) updateFileLocation2(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) if item2.endswith("Metadata.xml"): TRIMfileID = os.path.join(item, item2[:-1 - len("Metadata.xml")]) files = getFileUUIDLike('%' + TRIMfileID + '%', unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) fileUUID = None fileGrpUUID = None for key, value in files.iteritems(): if key.endswith("Metadata.xml"): fileUUID = value else: fileGrpUUID = value if fileUUID and fileGrpUUID: fileGrpUse = "TRIM file metadata" updateFileGrpUsefileGrpUUID(fileUUID, fileGrpUse, fileGrpUUID) elif fileUUID and not fileGrpUUID: updateFileGrpUse(fileUUID, "TRIM container metadata") os.removedirs(src) else: destDir = "metadata" if item == "manifest.txt": destDir = "metadata/submissionDocumentation" dst = os.path.join(unitPath, destDir, item) updateFileLocation2(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) files = getFileUUIDLike(dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) for key, value in files.iteritems(): fileUUID = value updateFileGrpUse(fileUUID, "TRIM metadata")
def restructureTRIMForComplianceFileUUIDsAssigned( unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith="%transferDirectory%" ): for dir in requiredDirectories: reqDirPath = os.path.join(unitPath, dir) if not os.path.isdir(reqDirPath): os.mkdir(reqDirPath) for item in os.listdir(unitPath): if item in requiredDirectories: continue src = os.path.join(unitPath, item) if os.path.isdir(src): objectsDir = os.path.join(unitPath, "objects", item) os.mkdir(objectsDir) for item2 in os.listdir(src): itemPath = os.path.join(src, item2) dst = os.path.join(objectsDir, item2) updateFileLocation2(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) if item2.endswith("Metadata.xml"): TRIMfileID = os.path.join(item, item2[: -1 - len("Metadata.xml")]) files = getFileUUIDLike( "%" + TRIMfileID + "%", unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith ) fileUUID = None fileGrpUUID = None for key, value in files.iteritems(): if key.endswith("Metadata.xml"): fileUUID = value else: fileGrpUUID = value if fileUUID and fileGrpUUID: fileGrpUse = "TRIM file metadata" updateFileGrpUsefileGrpUUID(fileUUID, fileGrpUse, fileGrpUUID) elif fileUUID and not fileGrpUUID: updateFileGrpUse(fileUUID, "TRIM container metadata") os.removedirs(src) else: destDir = "metadata" if item == "manifest.txt": destDir = "metadata/submissionDocumentation" dst = os.path.join(unitPath, destDir, item) updateFileLocation2(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) files = getFileUUIDLike(dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) for key, value in files.iteritems(): fileUUID = value updateFileGrpUse(fileUUID, "TRIM metadata")
def restructureTRIMForComplianceFileUUIDsAssigned(unitPath, unitIdentifier, unitIdentifierType="transfer", unitPathReplaceWith="%transferDirectory%"): # Create required directories archivematicaFunctions.create_directories(REQUIRED_DIRECTORIES, unitPath) # The types returned by os.listdir() depends on the type of the argument # passed to it. In this case, we want all of the returned names to be # bytestrings because they may contain arbitrary, non-Unicode characters. unitPath = str(unitPath) for item in os.listdir(unitPath): if item in REQUIRED_DIRECTORIES: continue src = os.path.join(unitPath, item) if os.path.isdir(src): objectsDir = os.path.join(unitPath, "objects", item) os.mkdir(objectsDir) for item2 in os.listdir(src): itemPath = os.path.join(src, item2) dst = os.path.join(objectsDir, item2) fileOperations.updateFileLocation2(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) if item2.endswith("Metadata.xml"): TRIMfileID = os.path.join(item, item2[:-1 - len("Metadata.xml")]) files = fileOperations.getFileUUIDLike('%' + TRIMfileID + '%', unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) fileUUID = None fileGrpUUID = None for key, value in files.items(): if key.endswith("Metadata.xml"): fileUUID = value else: fileGrpUUID = value if fileUUID and fileGrpUUID: fileGrpUse = "TRIM file metadata" fileOperations.updateFileGrpUsefileGrpUUID(fileUUID, fileGrpUse, fileGrpUUID) elif fileUUID and not fileGrpUUID: fileOperations.updateFileGrpUse(fileUUID, "TRIM container metadata") os.removedirs(src) else: destDir = "metadata" if item == "manifest.txt": destDir = "metadata/submissionDocumentation" dst = os.path.join(unitPath, destDir, item) fileOperations.updateFileLocation2(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) files = fileOperations.getFileUUIDLike(dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) for key, value in files.items(): fileUUID = value fileOperations.updateFileGrpUse(fileUUID, "TRIM metadata")
def restructureBagForComplianceFileUUIDsAssigned( unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith="%transferDirectory%" ): bagFileDefaultDest = os.path.join(unitPath, "logs", "BagIt") requiredDirectories.append(bagFileDefaultDest) unitDataPath = os.path.join(unitPath, "data") for dir in requiredDirectories: dirPath = os.path.join(unitPath, dir) dirDataPath = os.path.join(unitPath, "data", dir) if os.path.isdir(dirDataPath): # move to the top level src = dirDataPath dst = dirPath updateDirectoryLocation(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) print "moving directory ", dir else: print "creating: ", dir os.mkdir(dirPath) for item in os.listdir(unitPath): src = os.path.join(unitPath, item) if os.path.isfile(src): if item.startswith("manifest"): dst = os.path.join(unitPath, "metadata", item) else: dst = os.path.join(bagFileDefaultDest, item) updateFileLocation2(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) for item in os.listdir(unitDataPath): itemPath = os.path.join(unitDataPath, item) if os.path.isdir(itemPath) and item not in requiredDirectories: print "moving directory to objects: ", item dst = os.path.join(unitPath, "objects", item) updateDirectoryLocation(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) elif os.path.isfile(itemPath) and item not in optionalFiles: print "moving file to objects: ", item dst = os.path.join(unitPath, "objects", item) updateFileLocation2(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) print "removing empty data directory" os.rmdir(unitDataPath)
def restructureBagForComplianceFileUUIDsAssigned(job, unitPath, unitIdentifier, unitIdentifierType="transfer_id", unitPathReplaceWith="%transferDirectory%"): bagFileDefaultDest = os.path.join(unitPath, "logs", "BagIt") REQUIRED_DIRECTORIES.append(bagFileDefaultDest) # This needs to be cast to a string since we're calling os.path.join(), # and any of the other arguments could contain arbitrary, non-Unicode # characters. unitPath = str(unitPath) unitDataPath = str(os.path.join(unitPath, "data")) for dir in REQUIRED_DIRECTORIES: dirPath = os.path.join(unitPath, dir) dirDataPath = os.path.join(unitPath, "data", dir) if os.path.isdir(dirDataPath): # move to the top level src = dirDataPath dst = dirPath fileOperations.updateDirectoryLocation(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) job.pyprint("moving directory ", dir) else: if not os.path.isdir(dirPath): job.pyprint("creating: ", dir) os.mkdir(dirPath) for item in os.listdir(unitPath): src = os.path.join(unitPath, item) if os.path.isfile(src): if item.startswith("manifest"): dst = os.path.join(unitPath, "metadata", item) fileOperations.updateFileLocation2(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith, printfn=job.pyprint) elif item in OPTIONAL_FILES: job.pyprint("not moving:", item) else: dst = os.path.join(bagFileDefaultDest, item) fileOperations.updateFileLocation2(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith, printfn=job.pyprint) for item in os.listdir(unitDataPath): itemPath = os.path.join(unitDataPath, item) if os.path.isdir(itemPath) and item not in REQUIRED_DIRECTORIES: job.pyprint("moving directory to objects: ", item) dst = os.path.join(unitPath, "objects", item) fileOperations.updateDirectoryLocation(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) elif os.path.isfile(itemPath) and item not in OPTIONAL_FILES: job.pyprint("moving file to objects: ", item) dst = os.path.join(unitPath, "objects", item) fileOperations.updateFileLocation2(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith, printfn=job.pyprint) elif item in OPTIONAL_FILES: dst = os.path.join(unitPath, item) fileOperations.updateFileLocation2(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith, printfn=job.pyprint) job.pyprint("removing empty data directory") os.rmdir(unitDataPath)
def restructureBagForComplianceFileUUIDsAssigned(unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith = "%transferDirectory%"): bagFileDefaultDest = os.path.join(unitPath, "logs", "BagIt") requiredDirectories.append(bagFileDefaultDest) unitDataPath = os.path.join(unitPath, "data") for dir in requiredDirectories: dirPath = os.path.join(unitPath, dir) dirDataPath = os.path.join(unitPath, "data", dir) if os.path.isdir(dirDataPath): #move to the top level src = dirDataPath dst = dirPath updateDirectoryLocation(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) print "moving directory ", dir else: print "creating: ", dir os.mkdir(dirPath) for item in os.listdir(unitPath): src = os.path.join(unitPath, item) if os.path.isfile(src): if item.startswith("manifest"): dst = os.path.join(unitPath, "metadata", item) else: dst = os.path.join(bagFileDefaultDest, item) updateFileLocation2(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) for item in os.listdir(unitDataPath): itemPath = os.path.join(unitDataPath, item) if os.path.isdir(itemPath) and item not in requiredDirectories: print "moving directory to objects: ", item dst = os.path.join(unitPath, "objects", item) updateDirectoryLocation(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) elif os.path.isfile(itemPath) and item not in optionalFiles: print "moving file to objects: ", item dst = os.path.join(unitPath, "objects", item) updateFileLocation2(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith) print "removing empty data directory" os.rmdir(unitDataPath)
def restructureBagForComplianceFileUUIDsAssigned( job, unitPath, unitIdentifier, unitIdentifierType="transfer_id", unitPathReplaceWith="%transferDirectory%", ): bagFileDefaultDest = os.path.join(unitPath, "logs", "BagIt") MY_REQUIRED_DIRECTORIES = REQUIRED_DIRECTORIES + (bagFileDefaultDest, ) # This needs to be cast to a string since we're calling os.path.join(), # and any of the other arguments could contain arbitrary, non-Unicode # characters. unitPath = str(unitPath) unitDataPath = str(os.path.join(unitPath, "data")) for dir in MY_REQUIRED_DIRECTORIES: dirPath = os.path.join(unitPath, dir) dirDataPath = os.path.join(unitPath, "data", dir) if os.path.isdir(dirDataPath): if dir == "metadata" and os.path.isdir(dirPath): # We move the existing top-level metadata folder, or merge it # with what is currently there, before the next set of # directory operations to move everything up a level below. job.pyprint("{}: moving/merging {} to {}".format( dir, dirPath, dirDataPath)) move_or_merge(dirPath, dirDataPath) # move to the top level src = dirDataPath dst = dirPath fileOperations.updateDirectoryLocation( src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith, ) job.pyprint("moving directory ", dir) else: if not os.path.isdir(dirPath): job.pyprint("creating: ", dir) os.makedirs(dirPath) for item in os.listdir(unitPath): src = os.path.join(unitPath, item) if os.path.isfile(src): if item.startswith("manifest"): dst = os.path.join(unitPath, "metadata", item) fileOperations.updateFileLocation2( src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith, printfn=job.pyprint, ) elif item in OPTIONAL_FILES: job.pyprint("not moving:", item) else: dst = os.path.join(bagFileDefaultDest, item) fileOperations.updateFileLocation2( src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith, printfn=job.pyprint, ) for item in os.listdir(unitDataPath): itemPath = os.path.join(unitDataPath, item) if os.path.isdir(itemPath) and item not in MY_REQUIRED_DIRECTORIES: job.pyprint("moving directory to objects: ", item) dst = os.path.join(unitPath, "objects", item) fileOperations.updateDirectoryLocation( itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith, ) elif os.path.isfile(itemPath) and item not in OPTIONAL_FILES: job.pyprint("moving file to objects: ", item) dst = os.path.join(unitPath, "objects", item) fileOperations.updateFileLocation2( itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith, printfn=job.pyprint, ) elif item in OPTIONAL_FILES: dst = os.path.join(unitPath, item) fileOperations.updateFileLocation2( itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith, printfn=job.pyprint, ) job.pyprint("removing empty data directory") os.rmdir(unitDataPath)