Ejemplo n.º 1
0
def restructureForComplianceFileUUIDsAssigned(
        job,
        unit_path,
        unit_uuid,
        unit_type='sip_id',
        unit_path_replacement='%SIPDirectory%'):
    # Create required directories
    archivematicaFunctions.create_directories(REQUIRED_DIRECTORIES,
                                              unit_path,
                                              printing=True,
                                              printfn=job.pyprint)
    unit_path = os.path.join(unit_path, '')  # Ensure both end with /
    objects_path = os.path.join(unit_path, 'objects', '')
    # Move everything else to the objects directory, updating DB with new path
    for entry in os.listdir(unit_path):
        entry_path = os.path.join(unit_path, entry)
        if os.path.isfile(entry_path) and entry not in OPTIONAL_FILES:
            # Move to objects
            src = os.path.join(unit_path, entry)
            dst = os.path.join(objects_path, entry)
            fileOperations.updateFileLocation2(
                src=src,
                dst=dst,
                unitPath=unit_path,
                unitIdentifier=unit_uuid,
                unitIdentifierType=unit_type,  # sipUUID or transferUUID
                unitPathReplaceWith=unit_path_replacement,
                printfn=job.pyprint)
        if os.path.isdir(entry_path) and entry not in REQUIRED_DIRECTORIES:
            # Make directory at new location if not exists
            entry_objects_path = entry_path.replace(unit_path, objects_path)
            if not os.path.exists(entry_objects_path):
                job.pyprint('Creating directory:', entry_objects_path)
                os.mkdir(entry_objects_path)
            # Walk and move to objects dir, preserving directory structure
            # and updating the DB
            for dirpath, dirnames, filenames in os.walk(entry_path):
                # Create children dirs in new location, otherwise move fails
                for dirname in dirnames:
                    create_dir = os.path.join(dirpath, dirname).replace(
                        unit_path, objects_path)
                    if not os.path.exists(create_dir):
                        job.pyprint('Creating directory:', create_dir)
                        os.makedirs(create_dir)
                # Move files
                for filename in filenames:
                    src = os.path.join(dirpath, filename)
                    dst = src.replace(unit_path, objects_path)
                    fileOperations.updateFileLocation2(
                        src=src,
                        dst=dst,
                        unitPath=unit_path,
                        unitIdentifier=unit_uuid,
                        unitIdentifierType=unit_type,  # sipUUID or transferUUID
                        unitPathReplaceWith=unit_path_replacement,
                        printfn=job.pyprint)
            # Delete entry_path if it exists (is empty dir)
            job.pyprint('Removing directory', entry_path)
            shutil.rmtree(entry_path)
Ejemplo n.º 2
0
def restructureTRIMForComplianceFileUUIDsAssigned(
        unitPath,
        unitIdentifier,
        unitIdentifierType,
        unitPathReplaceWith="%transferDirectory%"):
    for dir in requiredDirectories:
        reqDirPath = os.path.join(unitPath, dir)
        if not os.path.isdir(reqDirPath):
            os.mkdir(reqDirPath)

    for item in os.listdir(unitPath):
        if item in requiredDirectories:
            continue
        src = os.path.join(unitPath, item)
        if os.path.isdir(src):
            objectsDir = os.path.join(unitPath, "objects", item)
            os.mkdir(objectsDir)
            for item2 in os.listdir(src):
                itemPath = os.path.join(src, item2)
                dst = os.path.join(objectsDir, item2)
                updateFileLocation2(itemPath, dst, unitPath, unitIdentifier,
                                    unitIdentifierType, unitPathReplaceWith)

                if item2.endswith("Metadata.xml"):
                    TRIMfileID = os.path.join(item,
                                              item2[:-1 - len("Metadata.xml")])
                    files = getFileUUIDLike('%' + TRIMfileID + '%', unitPath,
                                            unitIdentifier, unitIdentifierType,
                                            unitPathReplaceWith)
                    fileUUID = None
                    fileGrpUUID = None
                    for key, value in files.iteritems():
                        if key.endswith("Metadata.xml"):
                            fileUUID = value
                        else:
                            fileGrpUUID = value
                    if fileUUID and fileGrpUUID:
                        fileGrpUse = "TRIM file metadata"
                        updateFileGrpUsefileGrpUUID(fileUUID, fileGrpUse,
                                                    fileGrpUUID)
                    elif fileUUID and not fileGrpUUID:
                        updateFileGrpUse(fileUUID, "TRIM container metadata")
            os.removedirs(src)
        else:
            destDir = "metadata"
            if item == "manifest.txt":
                destDir = "metadata/submissionDocumentation"
            dst = os.path.join(unitPath, destDir, item)
            updateFileLocation2(src, dst, unitPath, unitIdentifier,
                                unitIdentifierType, unitPathReplaceWith)
            files = getFileUUIDLike(dst, unitPath, unitIdentifier,
                                    unitIdentifierType, unitPathReplaceWith)
            for key, value in files.iteritems():
                fileUUID = value
                updateFileGrpUse(fileUUID, "TRIM metadata")
def restructureTRIMForComplianceFileUUIDsAssigned(
    unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith="%transferDirectory%"
):
    for dir in requiredDirectories:
        reqDirPath = os.path.join(unitPath, dir)
        if not os.path.isdir(reqDirPath):
            os.mkdir(reqDirPath)

    for item in os.listdir(unitPath):
        if item in requiredDirectories:
            continue
        src = os.path.join(unitPath, item)
        if os.path.isdir(src):
            objectsDir = os.path.join(unitPath, "objects", item)
            os.mkdir(objectsDir)
            for item2 in os.listdir(src):
                itemPath = os.path.join(src, item2)
                dst = os.path.join(objectsDir, item2)
                updateFileLocation2(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith)

                if item2.endswith("Metadata.xml"):
                    TRIMfileID = os.path.join(item, item2[: -1 - len("Metadata.xml")])
                    files = getFileUUIDLike(
                        "%" + TRIMfileID + "%", unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith
                    )
                    fileUUID = None
                    fileGrpUUID = None
                    for key, value in files.iteritems():
                        if key.endswith("Metadata.xml"):
                            fileUUID = value
                        else:
                            fileGrpUUID = value
                    if fileUUID and fileGrpUUID:
                        fileGrpUse = "TRIM file metadata"
                        updateFileGrpUsefileGrpUUID(fileUUID, fileGrpUse, fileGrpUUID)
                    elif fileUUID and not fileGrpUUID:
                        updateFileGrpUse(fileUUID, "TRIM container metadata")
            os.removedirs(src)
        else:
            destDir = "metadata"
            if item == "manifest.txt":
                destDir = "metadata/submissionDocumentation"
            dst = os.path.join(unitPath, destDir, item)
            updateFileLocation2(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith)
            files = getFileUUIDLike(dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith)
            for key, value in files.iteritems():
                fileUUID = value
                updateFileGrpUse(fileUUID, "TRIM metadata")
Ejemplo n.º 4
0
def restructureTRIMForComplianceFileUUIDsAssigned(unitPath, unitIdentifier, unitIdentifierType="transfer", unitPathReplaceWith="%transferDirectory%"):
    # Create required directories
    archivematicaFunctions.create_directories(REQUIRED_DIRECTORIES, unitPath)

    # The types returned by os.listdir() depends on the type of the argument
    # passed to it. In this case, we want all of the returned names to be
    # bytestrings because they may contain arbitrary, non-Unicode characters.
    unitPath = str(unitPath)
    for item in os.listdir(unitPath):
        if item in REQUIRED_DIRECTORIES:
            continue
        src = os.path.join(unitPath, item)
        if os.path.isdir(src):
            objectsDir = os.path.join(unitPath, "objects", item)
            os.mkdir(objectsDir)
            for item2 in os.listdir(src):
                itemPath = os.path.join(src, item2)
                dst = os.path.join(objectsDir, item2)
                fileOperations.updateFileLocation2(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith)

                if item2.endswith("Metadata.xml"):
                    TRIMfileID = os.path.join(item, item2[:-1 - len("Metadata.xml")])
                    files = fileOperations.getFileUUIDLike('%' + TRIMfileID + '%', unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith)
                    fileUUID = None
                    fileGrpUUID = None
                    for key, value in files.items():
                        if key.endswith("Metadata.xml"):
                            fileUUID = value
                        else:
                            fileGrpUUID = value
                    if fileUUID and fileGrpUUID:
                        fileGrpUse = "TRIM file metadata"
                        fileOperations.updateFileGrpUsefileGrpUUID(fileUUID, fileGrpUse, fileGrpUUID)
                    elif fileUUID and not fileGrpUUID:
                        fileOperations.updateFileGrpUse(fileUUID, "TRIM container metadata")
            os.removedirs(src)
        else:
            destDir = "metadata"
            if item == "manifest.txt":
                destDir = "metadata/submissionDocumentation"
            dst = os.path.join(unitPath, destDir, item)
            fileOperations.updateFileLocation2(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith)
            files = fileOperations.getFileUUIDLike(dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith)
            for key, value in files.items():
                fileUUID = value
                fileOperations.updateFileGrpUse(fileUUID, "TRIM metadata")
def restructureBagForComplianceFileUUIDsAssigned(
    unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith="%transferDirectory%"
):
    bagFileDefaultDest = os.path.join(unitPath, "logs", "BagIt")
    requiredDirectories.append(bagFileDefaultDest)
    unitDataPath = os.path.join(unitPath, "data")
    for dir in requiredDirectories:
        dirPath = os.path.join(unitPath, dir)
        dirDataPath = os.path.join(unitPath, "data", dir)
        if os.path.isdir(dirDataPath):
            # move to the top level
            src = dirDataPath
            dst = dirPath
            updateDirectoryLocation(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith)
            print "moving directory ", dir
        else:
            print "creating: ", dir
            os.mkdir(dirPath)
    for item in os.listdir(unitPath):
        src = os.path.join(unitPath, item)
        if os.path.isfile(src):
            if item.startswith("manifest"):
                dst = os.path.join(unitPath, "metadata", item)
            else:
                dst = os.path.join(bagFileDefaultDest, item)
            updateFileLocation2(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith)
    for item in os.listdir(unitDataPath):
        itemPath = os.path.join(unitDataPath, item)
        if os.path.isdir(itemPath) and item not in requiredDirectories:
            print "moving directory to objects: ", item
            dst = os.path.join(unitPath, "objects", item)
            updateDirectoryLocation(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith)
        elif os.path.isfile(itemPath) and item not in optionalFiles:
            print "moving file to objects: ", item
            dst = os.path.join(unitPath, "objects", item)
            updateFileLocation2(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith)
    print "removing empty data directory"
    os.rmdir(unitDataPath)
Ejemplo n.º 6
0
def restructureBagForComplianceFileUUIDsAssigned(job, unitPath, unitIdentifier, unitIdentifierType="transfer_id", unitPathReplaceWith="%transferDirectory%"):
    bagFileDefaultDest = os.path.join(unitPath, "logs", "BagIt")
    REQUIRED_DIRECTORIES.append(bagFileDefaultDest)
    # This needs to be cast to a string since we're calling os.path.join(),
    # and any of the other arguments could contain arbitrary, non-Unicode
    # characters.
    unitPath = str(unitPath)
    unitDataPath = str(os.path.join(unitPath, "data"))
    for dir in REQUIRED_DIRECTORIES:
        dirPath = os.path.join(unitPath, dir)
        dirDataPath = os.path.join(unitPath, "data", dir)
        if os.path.isdir(dirDataPath):
            # move to the top level
            src = dirDataPath
            dst = dirPath
            fileOperations.updateDirectoryLocation(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith)
            job.pyprint("moving directory ", dir)
        else:
            if not os.path.isdir(dirPath):
                job.pyprint("creating: ", dir)
                os.mkdir(dirPath)
    for item in os.listdir(unitPath):
        src = os.path.join(unitPath, item)
        if os.path.isfile(src):
            if item.startswith("manifest"):
                dst = os.path.join(unitPath, "metadata", item)
                fileOperations.updateFileLocation2(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith, printfn=job.pyprint)
            elif item in OPTIONAL_FILES:
                job.pyprint("not moving:", item)
            else:
                dst = os.path.join(bagFileDefaultDest, item)
                fileOperations.updateFileLocation2(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith, printfn=job.pyprint)
    for item in os.listdir(unitDataPath):
        itemPath = os.path.join(unitDataPath, item)
        if os.path.isdir(itemPath) and item not in REQUIRED_DIRECTORIES:
            job.pyprint("moving directory to objects: ", item)
            dst = os.path.join(unitPath, "objects", item)
            fileOperations.updateDirectoryLocation(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith)
        elif os.path.isfile(itemPath) and item not in OPTIONAL_FILES:
            job.pyprint("moving file to objects: ", item)
            dst = os.path.join(unitPath, "objects", item)
            fileOperations.updateFileLocation2(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith, printfn=job.pyprint)
        elif item in OPTIONAL_FILES:
            dst = os.path.join(unitPath, item)
            fileOperations.updateFileLocation2(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith, printfn=job.pyprint)
    job.pyprint("removing empty data directory")
    os.rmdir(unitDataPath)
def restructureBagForComplianceFileUUIDsAssigned(unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith = "%transferDirectory%"):
    bagFileDefaultDest = os.path.join(unitPath, "logs", "BagIt")
    requiredDirectories.append(bagFileDefaultDest)
    unitDataPath = os.path.join(unitPath, "data")
    for dir in requiredDirectories:
        dirPath = os.path.join(unitPath, dir)
        dirDataPath = os.path.join(unitPath, "data", dir)
        if os.path.isdir(dirDataPath):
            #move to the top level
            src = dirDataPath 
            dst = dirPath
            updateDirectoryLocation(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith)
            print "moving directory ", dir 
        else:
            print "creating: ", dir
            os.mkdir(dirPath)
    for item in os.listdir(unitPath):
        src = os.path.join(unitPath, item)
        if os.path.isfile(src):
            if item.startswith("manifest"):
                dst = os.path.join(unitPath, "metadata", item)
            else:
                dst = os.path.join(bagFileDefaultDest, item)
            updateFileLocation2(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith)
    for item in os.listdir(unitDataPath):
        itemPath =  os.path.join(unitDataPath, item)
        if os.path.isdir(itemPath) and item not in requiredDirectories:
            print "moving directory to objects: ", item
            dst = os.path.join(unitPath, "objects", item)
            updateDirectoryLocation(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith)
        elif os.path.isfile(itemPath) and item not in optionalFiles:
            print "moving file to objects: ", item
            dst = os.path.join(unitPath, "objects", item)
            updateFileLocation2(itemPath, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith)
    print "removing empty data directory"
    os.rmdir(unitDataPath)
def restructureBagForComplianceFileUUIDsAssigned(
    job,
    unitPath,
    unitIdentifier,
    unitIdentifierType="transfer_id",
    unitPathReplaceWith="%transferDirectory%",
):
    bagFileDefaultDest = os.path.join(unitPath, "logs", "BagIt")
    MY_REQUIRED_DIRECTORIES = REQUIRED_DIRECTORIES + (bagFileDefaultDest, )
    # This needs to be cast to a string since we're calling os.path.join(),
    # and any of the other arguments could contain arbitrary, non-Unicode
    # characters.
    unitPath = str(unitPath)
    unitDataPath = str(os.path.join(unitPath, "data"))
    for dir in MY_REQUIRED_DIRECTORIES:
        dirPath = os.path.join(unitPath, dir)
        dirDataPath = os.path.join(unitPath, "data", dir)
        if os.path.isdir(dirDataPath):
            if dir == "metadata" and os.path.isdir(dirPath):
                # We move the existing top-level metadata folder, or merge it
                # with what is currently there, before the next set of
                # directory operations to move everything up a level below.
                job.pyprint("{}: moving/merging {} to {}".format(
                    dir, dirPath, dirDataPath))
                move_or_merge(dirPath, dirDataPath)

            # move to the top level
            src = dirDataPath
            dst = dirPath
            fileOperations.updateDirectoryLocation(
                src,
                dst,
                unitPath,
                unitIdentifier,
                unitIdentifierType,
                unitPathReplaceWith,
            )
            job.pyprint("moving directory ", dir)

        else:
            if not os.path.isdir(dirPath):
                job.pyprint("creating: ", dir)
                os.makedirs(dirPath)
    for item in os.listdir(unitPath):
        src = os.path.join(unitPath, item)
        if os.path.isfile(src):
            if item.startswith("manifest"):
                dst = os.path.join(unitPath, "metadata", item)
                fileOperations.updateFileLocation2(
                    src,
                    dst,
                    unitPath,
                    unitIdentifier,
                    unitIdentifierType,
                    unitPathReplaceWith,
                    printfn=job.pyprint,
                )
            elif item in OPTIONAL_FILES:
                job.pyprint("not moving:", item)
            else:
                dst = os.path.join(bagFileDefaultDest, item)
                fileOperations.updateFileLocation2(
                    src,
                    dst,
                    unitPath,
                    unitIdentifier,
                    unitIdentifierType,
                    unitPathReplaceWith,
                    printfn=job.pyprint,
                )
    for item in os.listdir(unitDataPath):
        itemPath = os.path.join(unitDataPath, item)
        if os.path.isdir(itemPath) and item not in MY_REQUIRED_DIRECTORIES:
            job.pyprint("moving directory to objects: ", item)
            dst = os.path.join(unitPath, "objects", item)
            fileOperations.updateDirectoryLocation(
                itemPath,
                dst,
                unitPath,
                unitIdentifier,
                unitIdentifierType,
                unitPathReplaceWith,
            )
        elif os.path.isfile(itemPath) and item not in OPTIONAL_FILES:
            job.pyprint("moving file to objects: ", item)
            dst = os.path.join(unitPath, "objects", item)
            fileOperations.updateFileLocation2(
                itemPath,
                dst,
                unitPath,
                unitIdentifier,
                unitIdentifierType,
                unitPathReplaceWith,
                printfn=job.pyprint,
            )
        elif item in OPTIONAL_FILES:
            dst = os.path.join(unitPath, item)
            fileOperations.updateFileLocation2(
                itemPath,
                dst,
                unitPath,
                unitIdentifier,
                unitIdentifierType,
                unitPathReplaceWith,
                printfn=job.pyprint,
            )
    job.pyprint("removing empty data directory")
    os.rmdir(unitDataPath)