def verifyMetsFileSecChecksums(metsFile, date, taskUUID, transferDirectory, transferUUID, relativeDirectory="./"):
    print(metsFile)
    DspaceLicenses = "metadata/submissionDocumentation/DspaceLicenses"
    try:
        path = os.path.join(transferDirectory, DspaceLicenses)
        if not os.path.isdir(path):
            os.mkdir(path)
    except:
        print("error creating DspaceLicenses directory.")
    exitCode = 0
    tree = etree.parse(metsFile)
    root = tree.getroot()
    for item in root.findall("{http://www.loc.gov/METS/}fileSec/{http://www.loc.gov/METS/}fileGrp"):
        # print etree.tostring(item)
        # print item

        USE = item.get("USE")
        if USE == "LICENSE":
            for item2 in item:
                if item2.tag == "{http://www.loc.gov/METS/}file":
                    for item3 in item2:
                        if item3.tag == "{http://www.loc.gov/METS/}FLocat":
                            fileLocation = item3.get("{http://www.w3.org/1999/xlink}href")
                            fileFullPath = os.path.join(relativeDirectory, fileLocation)
                            dest = os.path.join(transferDirectory, DspaceLicenses, os.path.basename(fileLocation))
                            rename(fileFullPath, dest)

                            src = fileFullPath.replace(transferDirectory, "%transferDirectory%")
                            dst = dest.replace(transferDirectory, "%transferDirectory%")
                            eventDetail = ""
                            eventOutcomeDetailNote = "moved from=\"" + src + "\"; moved to=\"" + dst + "\""
                            updateFileLocation(src, dst, "movement", date, eventDetail, transferUUID=transferUUID, eventOutcomeDetailNote=eventOutcomeDetailNote)
    return exitCode
Exemple #2
0
def verifyMetsFileSecChecksums(metsFile, date, taskUUID, transferDirectory, transferUUID, relativeDirectory="./"):
    print(metsFile)
    DSpaceMets = "metadata/submissionDocumentation/DSpaceMets"
    try:
        path = os.path.join(transferDirectory, DSpaceMets)
        if not os.path.isdir(path):
            os.mkdir(path)
    except:
        print("error creating DSpaceMets directory.")
    exitCode = 0

    metsDirectory = os.path.basename(os.path.dirname(metsFile))

    if metsDirectory == "DSpace_export":
        outputDirectory = path
    else:
        outputDirectory = os.path.join(path, metsDirectory)
        if not os.path.isdir(outputDirectory):
            os.mkdir(outputDirectory)

    dest = os.path.join(outputDirectory, "mets.xml")
    rename(metsFile, dest)

    src = metsFile.replace(transferDirectory, "%transferDirectory%")
    dst = dest.replace(transferDirectory, "%transferDirectory%")
    eventDetail = ""
    eventOutcomeDetailNote = "moved from=\"" + src + "\"; moved to=\"" + dst + "\""
    updateFileLocation(src, dst, "movement", date, eventDetail, transferUUID=transferUUID, eventOutcomeDetailNote=eventOutcomeDetailNote)

    return exitCode
def moveSIP(src, dst, transferUUID, sharedDirectoryPath):
    # os.rename(src, dst)
    if src.endswith("/"):
        src = src[:-1]

    dest = dst.replace(sharedDirectoryPath, "%sharedPath%", 1)
    if dest.endswith("/"):
        dest = os.path.join(dest, os.path.basename(src))
    if dest.endswith("/."):
        dest = os.path.join(dest[:-1], os.path.basename(src))

    if os.path.isdir(src):
        dest += "/"
    updateDB(dest, transferUUID)

    rename(src, dst)
Exemple #4
0
def something(SIPDirectory, accessDirectory, objectsDirectory, DIPDirectory, SIPUUID, date, copy=False):
    # exitCode = 435
    exitCode = 179
    print(SIPDirectory)
    # For every file, & directory Try to find the matching file & directory in the objects directory
    for (path, dirs, files) in os.walk(accessDirectory):
        for file in files:
            accessPath = os.path.join(path, file)
            objectPath = accessPath.replace(accessDirectory, objectsDirectory, 1)
            objectName = os.path.basename(objectPath)
            objectNameExtensionIndex = objectName.rfind(".")

            if objectNameExtensionIndex != -1:
                objectName = objectName[:objectNameExtensionIndex + 1]
                objectNameLike = os.path.join(os.path.dirname(objectPath), objectName).replace(SIPDirectory, "%SIPDirectory%", 1)

                files = File.objects.filter(removedtime__isnull=True,
                                            currentlocation__startswith=objectNameLike,
                                            sip_id=SIPUUID)
                if not files.exists():
                    print("No corresponding object for:", accessPath.replace(SIPDirectory, "%SIPDirectory%", 1), file=sys.stderr)
                    exitCode = 1
                update = []
                for objectUUID, objectPath in files.values_list('uuid', 'currentlocation'):
                    objectExtension = objectPath.replace(objectNameLike, "", 1)
                    print(objectName[objectNameExtensionIndex + 1:], objectExtension, "\t", end=' ')
                    if objectExtension.find(".") != -1:
                        continue
                    print(objectName[objectNameExtensionIndex + 1:], objectExtension, "\t", end=' ')
                    dipPath = os.path.join(DIPDirectory, "objects", "%s-%s" % (objectUUID, os.path.basename(accessPath)))
                    if copy:
                        print("TODO - copy not supported yet")
                    else:
                        dest = dipPath
                        rename(accessPath, dest)

                        src = accessPath.replace(SIPDirectory, "%SIPDirectory%")
                        dst = dest.replace(SIPDirectory, "%SIPDirectory%")
                        update.append((src, dst))
                for src, dst in update:
                    eventDetail = ""
                    eventOutcomeDetailNote = "moved from=\"" + src + "\"; moved to=\"" + dst + "\""
                    updateFileLocation(src, dst, "movement", date, eventDetail, sipUUID=SIPUUID, eventOutcomeDetailNote=eventOutcomeDetailNote)
    return exitCode
def moveSIP(src, dst, sipUUID, sharedDirectoryPath):
    # Prepare paths
    if src.endswith("/"):
        src = src[:-1]

    dest = dst.replace(sharedDirectoryPath, "%sharedPath%", 1)
    if dest.endswith("/"):
        dest = os.path.join(dest, os.path.basename(src))
    if dest.endswith("/."):
        dest = os.path.join(dest[:-1], os.path.basename(src))
    updateDB(dest + "/", sipUUID)

    # If destination already exists, delete it with warning
    dest_path = os.path.join(dst, os.path.basename(src))
    if os.path.exists(dest_path):
        print(dest_path, 'exists, deleting', file=sys.stderr)
        shutil.rmtree(dest_path)

    rename(src, dst)
def verifyMetsFileSecChecksums(
    job,
    metsFile,
    date,
    taskUUID,
    transferDirectory,
    transferUUID,
    relativeDirectory="./",
):
    job.pyprint(metsFile)
    DSpaceMets = "metadata/submissionDocumentation/DSpaceMets"
    try:
        path = os.path.join(transferDirectory, DSpaceMets)
        if not os.path.isdir(path):
            os.mkdir(path)
    except:
        job.pyprint("error creating DSpaceMets directory.")
    exitCode = 0

    metsDirectory = os.path.basename(os.path.dirname(metsFile))

    if metsDirectory == "DSpace_export":
        outputDirectory = path
    else:
        outputDirectory = os.path.join(path, metsDirectory)
        if not os.path.isdir(outputDirectory):
            os.mkdir(outputDirectory)

    dest = os.path.join(outputDirectory, "mets.xml")
    rename_status = rename(metsFile,
                           dest,
                           printfn=job.pyprint,
                           should_exit=False)
    if rename_status:
        return rename_status

    src = metsFile.replace(transferDirectory, "%transferDirectory%")
    dst = dest.replace(transferDirectory, "%transferDirectory%")
    eventDetail = ""
    eventOutcomeDetailNote = 'moved from="' + src + '"; moved to="' + dst + '"'
    updateFileLocation(
        src,
        dst,
        "movement",
        date,
        eventDetail,
        transferUUID=transferUUID,
        eventOutcomeDetailNote=eventOutcomeDetailNote,
    )

    return exitCode
def moveSIP(job, src, dst, transferUUID, sharedDirectoryPath):
    # os.rename(src, dst)
    if src.endswith("/"):
        src = src[:-1]

    dest = dst.replace(sharedDirectoryPath, "%sharedPath%", 1)
    if dest.endswith("/"):
        dest = os.path.join(dest, os.path.basename(src))
    if dest.endswith("/."):
        dest = os.path.join(dest[:-1], os.path.basename(src))

    if os.path.isdir(src):
        dest += "/"
    updateDB(dest, transferUUID)

    return rename(src, dst, printfn=job.pyprint, should_exit=False)
def main(job, transfer_uuid, sip_directory, date, task_uuid, delete=False):
    files = File.objects.filter(transfer=transfer_uuid,
                                removedtime__isnull=True)
    if not files:
        job.pyprint('No files found for transfer: ', transfer_uuid)

    transfer_mdl = Transfer.objects.get(uuid=transfer_uuid)

    # We track whether or not anything was extracted because that controls what
    # the next microservice chain link will be.
    # If something was extracted, then a new identification step has to be
    # kicked off on those files; otherwise, we can go ahead with the transfer.
    extracted = False

    for file_ in files:
        try:
            format_id = FileFormatVersion.objects.get(file_uuid=file_.uuid)
        # Can't do anything if the file wasn't identified in the previous step
        except:
            job.pyprint('Not extracting contents from',
                        os.path.basename(file_.currentlocation),
                        ' - file format not identified',
                        file=sys.stderr)
            continue
        if format_id.format_version is None:
            job.pyprint('Not extracting contents from',
                        os.path.basename(file_.currentlocation),
                        ' - file format not identified',
                        file=sys.stderr)
            continue
        # Extraction commands are defined in the FPR just like normalization
        # commands
        try:
            command = FPCommand.active.get(
                fprule__format=format_id.format_version,
                fprule__purpose='extract',
                fprule__enabled=True,
            )
        except FPCommand.DoesNotExist:
            job.pyprint('Not extracting contents from',
                        os.path.basename(file_.currentlocation),
                        ' - No rule found to extract',
                        file=sys.stderr)
            continue

        # Check if file has already been extracted
        if already_extracted(file_):
            job.pyprint('Not extracting contents from',
                        os.path.basename(file_.currentlocation),
                        ' - extraction already happened.',
                        file=sys.stderr)
            continue

        output_file_path = file_.currentlocation.replace(TRANSFER_DIRECTORY,
                                                         sip_directory)

        # Temporarily rename the input package so that when we extract the
        # contents we don't extract it to a directory that will conflict with
        # the names we want to preserve in our PREMIS:originalLocation.
        temp_dir = temporary_directory(output_file_path, date)
        rename(output_file_path, temp_dir)

        # Create the extract packages command.
        if command.script_type == 'command' or command.script_type == 'bashScript':
            args = []
            command_to_execute = command.command.replace('%inputFile%',
                                                         temp_dir)
            command_to_execute = command_to_execute.replace('%outputDirectory%',
                                                            output_file_path)
        else:
            command_to_execute = command.command
            args = [temp_dir, output_file_path]

        # Make the command clear to users when inspecting stdin/stdout.
        logger.info("Command to execute is: %s", command_to_execute)
        exitstatus, stdout, stderr = executeOrRun(command.script_type,
                                                  command_to_execute,
                                                  arguments=args,
                                                  printing=True,
                                                  capture_output=True)
        job.write_output(stdout)
        job.write_error(stderr)

        if not exitstatus == 0:
            # Dang, looks like the extraction failed
            job.pyprint('Command', command.description, 'failed!',
                        file=sys.stderr)
        else:
            extracted = True
            job.pyprint('Extracted contents from',
                        os.path.basename(output_file_path))

            # Assign UUIDs and insert them into the database, so the newly-
            # extracted files are properly tracked by Archivematica
            for extracted_file in tree(output_file_path):
                assign_uuid(
                    job, extracted_file, file_.uuid, transfer_uuid, date,
                    task_uuid, sip_directory, file_.currentlocation,
                    file_.originallocation, output_file_path)

            if transfer_mdl.diruuids:
                create_extracted_dir_uuids(
                    job, transfer_mdl, output_file_path, sip_directory, file_)

            # We may want to remove the original package file after extracting
            # its contents
            if delete:
                delete_and_record_package_file(
                    job, temp_dir, file_.uuid, file_.currentlocation)

    if extracted:
        return 0
    else:
        return 255