コード例 #1
0
def verifyMetsFileSecChecksums(metsFile, date, taskUUID, transferDirectory, transferUUID, relativeDirectory="./"):
    print metsFile
    DspaceLicenses = "metadata/submissionDocumentation/DspaceLicenses"
    try:
        path = os.path.join(transferDirectory, DspaceLicenses)
        if not os.path.isdir(path):
            os.mkdir(path)
    except:
        print "error creating DspaceLicenses directory."
    exitCode = 0
    tree = etree.parse(metsFile)
    root = tree.getroot()
    for item in root.findall("{http://www.loc.gov/METS/}fileSec/{http://www.loc.gov/METS/}fileGrp"):
        #print etree.tostring(item)
        #print item

        USE = item.get("USE")
        if USE == "LICENSE":
            for item2 in item:
                if item2.tag == "{http://www.loc.gov/METS/}file":
                    for item3 in item2:
                        if item3.tag == "{http://www.loc.gov/METS/}FLocat":
                            fileLocation = item3.get("{http://www.w3.org/1999/xlink}href")
                            fileFullPath = os.path.join(relativeDirectory, fileLocation)
                            dest = os.path.join(transferDirectory, DspaceLicenses, os.path.basename(fileLocation))
                            renameAsSudo(fileFullPath, dest)

                            src = fileFullPath.replace(transferDirectory, "%transferDirectory%")
                            dst = dest.replace(transferDirectory, "%transferDirectory%")
                            eventDetail = ""
                            eventOutcomeDetailNote = "moved from=\"" + src + "\"; moved to=\"" + dst + "\""
                            updateFileLocation(src, dst, "movement", date, eventDetail, transferUUID=transferUUID, eventOutcomeDetailNote = eventOutcomeDetailNote)
    return exitCode
コード例 #2
0
def verifyMetsFileSecChecksums(metsFile, date, taskUUID, transferDirectory, transferUUID, relativeDirectory="./"):
    print metsFile
    DSpaceMets = "metadata/submissionDocumentation/DSpaceMets"
    try:
        path = os.path.join(transferDirectory, DSpaceMets)
        if not os.path.isdir(path):
            os.mkdir(path)
    except:
        print "error creating DSpaceMets directory."
    exitCode = 0

    metsDirectory = os.path.basename(os.path.dirname(metsFile))

    if metsDirectory == "DSpace_export":
        outputDirectory = path
    else:
        outputDirectory = os.path.join(path, metsDirectory)
        if not os.path.isdir(outputDirectory):
            os.mkdir(outputDirectory)

    dest = os.path.join(outputDirectory, "mets.xml")
    renameAsSudo(metsFile, dest)

    src = metsFile.replace(transferDirectory, "%transferDirectory%")
    dst = dest.replace(transferDirectory, "%transferDirectory%")
    eventDetail = ""
    eventOutcomeDetailNote = "moved from=\"" + src + "\"; moved to=\"" + dst + "\""
    updateFileLocation(src, dst, "movement", date, eventDetail, transferUUID=transferUUID, eventOutcomeDetailNote = eventOutcomeDetailNote)

    return exitCode
コード例 #3
0
def verifyMetsFileSecChecksums(metsFile, date, taskUUID, transferDirectory, transferUUID, relativeDirectory="./"):
    print metsFile
    DSpaceMets = "metadata/submissionDocumentation/DSpaceMets"
    try:
        path = os.path.join(transferDirectory, DSpaceMets)
        if not os.path.isdir(path):
            os.mkdir(path)
    except:
        print "error creating DSpaceMets directory."
    exitCode = 0

    metsDirectory = os.path.basename(os.path.dirname(metsFile))

    if metsDirectory == "DSpace_export":
        outputDirectory = path
    else:
        outputDirectory = os.path.join(path, metsDirectory)
        if not os.path.isdir(outputDirectory):
            os.mkdir(outputDirectory)

    dest = os.path.join(outputDirectory, "mets.xml")
    renameAsSudo(metsFile, dest)

    src = metsFile.replace(transferDirectory, "%transferDirectory%")
    dst = dest.replace(transferDirectory, "%transferDirectory%")
    eventDetail = ""
    eventOutcomeDetailNote = "moved from=\"" + src + "\"; moved to=\"" + dst + "\""
    updateFileLocation(src, dst, "movement", date, eventDetail, transferUUID=transferUUID, eventOutcomeDetailNote = eventOutcomeDetailNote)

    return exitCode
コード例 #4
0
def something(SIPDirectory, accessDirectory, objectsDirectory, DIPDirectory, SIPUUID, date, copy=False):
    #exitCode = 435
    exitCode = 179
    print SIPDirectory
    #For every file, & directory Try to find the matching file & directory in the objects directory
    for (path, dirs, files) in os.walk(accessDirectory):
        for file in files:
            accessPath = os.path.join(path, file)
            objectPath = accessPath.replace(accessDirectory, objectsDirectory, 1)
            objectName = os.path.basename(objectPath)
            objectNameExtensionIndex = objectName.rfind(".")

            if objectNameExtensionIndex != -1:
                objectName = objectName[:objectNameExtensionIndex + 1]
                objectNameLike = os.path.join( os.path.dirname(objectPath), objectName).replace(SIPDirectory, "%SIPDirectory%", 1)
                #sql = "SELECT fileUUID, currentLocation FROM Files WHERE currentLocation LIKE  '%s%' AND removedTime = 0 AND SIPUUID = '%s'" % (objectNameLike, SIPUUID)
                #ValueError: unsupported format character ''' (0x27) at index 76
                sql = "SELECT fileUUID, currentLocation FROM Files WHERE currentLocation LIKE  '" + objectNameLike + "%' AND removedTime = 0 AND SIPUUID = '"+ SIPUUID + "'"
                c, sqlLock = databaseInterface.querySQL(sql)
                row = c.fetchone()
                if not row:
                    print >>sys.stderr, "No corresponding object for:", accessPath.replace(SIPDirectory, "%SIPDirectory%", 1)
                    exitCode = 1
                update = []
                while row != None:
                    objectUUID = row[0]
                    objectPath = row[1]
                    objectExtension = objectPath.replace(objectNameLike, "", 1)
                    print objectName[objectNameExtensionIndex + 1:], objectExtension, "\t",
                    if objectExtension.find(".") != -1:
                        print
                        row = c.fetchone()
                        continue
                    print objectName[objectNameExtensionIndex + 1:], objectExtension, "\t",
                    print row  
                    dipPath = os.path.join(DIPDirectory,  "objects", "%s-%s" % (objectUUID, os.path.basename(accessPath)))
                    if copy:
                        print "TODO - copy not supported yet"
                    else:
                        #
                        dest = dipPath
                        renameAsSudo(accessPath, dest)

                        src = accessPath.replace(SIPDirectory, "%SIPDirectory%")
                        dst = dest.replace(SIPDirectory, "%SIPDirectory%")
                        update.append((src, dst))

                        #
                    row = c.fetchone()
                sqlLock.release()
                for src, dst in update:
                    eventDetail = ""
                    eventOutcomeDetailNote = "moved from=\"" + src + "\"; moved to=\"" + dst + "\""
                    updateFileLocation(src, dst, "movement", date, eventDetail, sipUUID=SIPUUID, eventOutcomeDetailNote = eventOutcomeDetailNote)
    return exitCode
def verifyMetsFileSecChecksums(
    job,
    metsFile,
    date,
    taskUUID,
    transferDirectory,
    transferUUID,
    relativeDirectory="./",
):
    job.pyprint(metsFile)
    DSpaceMets = "metadata/submissionDocumentation/DSpaceMets"
    try:
        path = os.path.join(transferDirectory, DSpaceMets)
        if not os.path.isdir(path):
            os.mkdir(path)
    except:
        job.pyprint("error creating DSpaceMets directory.")
    exitCode = 0

    metsDirectory = os.path.basename(os.path.dirname(metsFile))

    if metsDirectory == "DSpace_export":
        outputDirectory = path
    else:
        outputDirectory = os.path.join(path, metsDirectory)
        if not os.path.isdir(outputDirectory):
            os.mkdir(outputDirectory)

    dest = os.path.join(outputDirectory, "mets.xml")
    rename_status = rename(metsFile,
                           dest,
                           printfn=job.pyprint,
                           should_exit=False)
    if rename_status:
        return rename_status

    src = metsFile.replace(transferDirectory, "%transferDirectory%")
    dst = dest.replace(transferDirectory, "%transferDirectory%")
    eventDetail = ""
    eventOutcomeDetailNote = 'moved from="' + src + '"; moved to="' + dst + '"'
    updateFileLocation(
        src,
        dst,
        "movement",
        date,
        eventDetail,
        transferUUID=transferUUID,
        eventOutcomeDetailNote=eventOutcomeDetailNote,
    )

    return exitCode
コード例 #6
0
def something(SIPDirectory, accessDirectory, objectsDirectory, DIPDirectory, SIPUUID, date, copy=False):
    # exitCode = 435
    exitCode = 179
    print(SIPDirectory)
    # For every file, & directory Try to find the matching file & directory in the objects directory
    for (path, dirs, files) in os.walk(accessDirectory):
        for file in files:
            accessPath = os.path.join(path, file)
            objectPath = accessPath.replace(accessDirectory, objectsDirectory, 1)
            objectName = os.path.basename(objectPath)
            objectNameExtensionIndex = objectName.rfind(".")

            if objectNameExtensionIndex != -1:
                objectName = objectName[:objectNameExtensionIndex + 1]
                objectNameLike = os.path.join(os.path.dirname(objectPath), objectName).replace(SIPDirectory, "%SIPDirectory%", 1)

                files = File.objects.filter(removedtime__isnull=True,
                                            currentlocation__startswith=objectNameLike,
                                            sip_id=SIPUUID)
                if not files.exists():
                    print("No corresponding object for:", accessPath.replace(SIPDirectory, "%SIPDirectory%", 1), file=sys.stderr)
                    exitCode = 1
                update = []
                for objectUUID, objectPath in files.values_list('uuid', 'currentlocation'):
                    objectExtension = objectPath.replace(objectNameLike, "", 1)
                    print(objectName[objectNameExtensionIndex + 1:], objectExtension, "\t", end=' ')
                    if objectExtension.find(".") != -1:
                        continue
                    print(objectName[objectNameExtensionIndex + 1:], objectExtension, "\t", end=' ')
                    dipPath = os.path.join(DIPDirectory, "objects", "%s-%s" % (objectUUID, os.path.basename(accessPath)))
                    if copy:
                        print("TODO - copy not supported yet")
                    else:
                        dest = dipPath
                        rename(accessPath, dest)

                        src = accessPath.replace(SIPDirectory, "%SIPDirectory%")
                        dst = dest.replace(SIPDirectory, "%SIPDirectory%")
                        update.append((src, dst))
                for src, dst in update:
                    eventDetail = ""
                    eventOutcomeDetailNote = "moved from=\"" + src + "\"; moved to=\"" + dst + "\""
                    updateFileLocation(src, dst, "movement", date, eventDetail, sipUUID=SIPUUID, eventOutcomeDetailNote=eventOutcomeDetailNote)
    return exitCode
コード例 #7
0
        sipPath, groupType, 1)  #"%SIPDirectory%objects/"

    sanitizations = sanitizeNames.sanitizeRecursively(objectsDirectory)

    eventDetail = "program=\"sanitizeNames\"; version=\"" + sanitizeNames.VERSION + "\""
    for oldfile, newfile in sanitizations:
        if os.path.isfile(newfile):
            oldfile = oldfile.replace(objectsDirectory, relativeReplacement, 1)
            newfile = newfile.replace(objectsDirectory, relativeReplacement, 1)
            print oldfile, " -> ", newfile

            if groupType == "%SIPDirectory%":
                updateFileLocation(oldfile,
                                   newfile,
                                   "name cleanup",
                                   date,
                                   "prohibited characters removed:" +
                                   eventDetail,
                                   fileUUID=None,
                                   sipUUID=sipUUID)
            elif groupType == "%transferDirectory%":
                updateFileLocation(oldfile,
                                   newfile,
                                   "name cleanup",
                                   date,
                                   "prohibited characters removed:" +
                                   eventDetail,
                                   fileUUID=None,
                                   transferUUID=sipUUID)
            else:
                print >> sys.stderr, "bad group type", groupType
                exit(3)
コード例 #8
0
def sanitize_object_names(job, objectsDirectory, sipUUID, date, groupType, groupSQL, sipPath):
    """Sanitize object names in a Transfer/SIP."""
    relativeReplacement = objectsDirectory.replace(sipPath, groupType, 1)  # "%SIPDirectory%objects/"

    # Get any ``Directory`` instances created for this transfer (if such exist)
    directory_mdls = []
    if groupSQL == 'transfer_id':
        transfer_mdl = Transfer.objects.get(uuid=sipUUID)
        if transfer_mdl.diruuids:
            directory_mdls = Directory.objects.filter(
                transfer=transfer_mdl).all()

    # Sanitize objects on disk
    sanitizations = sanitize_names.sanitizeRecursively(job, objectsDirectory)
    for oldfile, newfile in sanitizations.items():
        logger.info('sanitizations: %s -> %s', oldfile, newfile)

    eventDetail = 'program="sanitize_names"; version="' + sanitize_names.VERSION + '"'

    # Update files in DB
    kwargs = {
        groupSQL: sipUUID,
        "removedtime__isnull": True,
    }
    file_mdls = File.objects.filter(**kwargs)
    # Iterate over ``File`` and ``Directory``
    for model in chain(file_mdls, directory_mdls):
        # Check all files to see if any parent directory had a sanitization event
        current_location = unicodeToStr(
            unicodedata.normalize('NFC', model.currentlocation)).replace(
                groupType, sipPath)
        sanitized_location = unicodeToStr(current_location)
        logger.info('Checking %s', current_location)

        # Check parent directories
        # Since directory keys are a mix of sanitized and unsanitized, this is
        # a little complicated
        # Directories keys are in the form sanitized/sanitized/unsanitized
        # When a match is found (eg 'unsanitized' -> 'sanitized') reset the
        # search.
        # This will find 'sanitized/unsanitized2' -> 'sanitized/sanitized2' on
        # the next pass
        # TODO This should be checked for a more efficient solution
        dirpath = sanitized_location
        while objectsDirectory in dirpath:  # Stay within unit
            if dirpath in sanitizations:  # Make replacement
                sanitized_location = sanitized_location.replace(
                    dirpath, sanitizations[dirpath])
                dirpath = sanitized_location  # Reset search
            else:  # Check next level up
                dirpath = os.path.dirname(dirpath)

        if current_location != sanitized_location:
            old_location = current_location.replace(
                objectsDirectory, relativeReplacement, 1)
            new_location = sanitized_location.replace(
                objectsDirectory, relativeReplacement, 1)
            kwargs = {
                'src': old_location,
                'dst': new_location,
                'eventType': 'name cleanup',
                'eventDateTime': date,
                'eventDetail': "prohibited characters removed:" + eventDetail,
                'fileUUID': None,
            }
            if groupType == "%SIPDirectory%":
                kwargs['sipUUID'] = sipUUID
            elif groupType == "%transferDirectory%":
                kwargs['transferUUID'] = sipUUID
            else:
                job.pyprint("bad group type", groupType, file=sys.stderr)
                return 3
            logger.info('Sanitized name: %s -> %s', old_location, new_location)
            job.pyprint('Sanitized name:', old_location, " -> ", new_location)
            if isinstance(model, File):
                updateFileLocation(**kwargs)
            else:
                model.currentlocation = new_location
                model.save()
        else:
            logger.info('No sanitization for %s', current_location)
            job.pyprint('No sanitization found for', current_location)

    return 0
コード例 #9
0
def something(SIPDirectory,
              accessDirectory,
              objectsDirectory,
              DIPDirectory,
              SIPUUID,
              date,
              copy=False):
    #exitCode = 435
    exitCode = 179
    print SIPDirectory
    #For every file, & directory Try to find the matching file & directory in the objects directory
    for (path, dirs, files) in os.walk(accessDirectory):
        for file in files:
            accessPath = os.path.join(path, file)
            objectPath = accessPath.replace(accessDirectory, objectsDirectory,
                                            1)
            objectName = os.path.basename(objectPath)
            objectNameExtensionIndex = objectName.rfind(".")

            if objectNameExtensionIndex != -1:
                objectName = objectName[:objectNameExtensionIndex + 1]
                objectNameLike = os.path.join(os.path.dirname(objectPath),
                                              objectName).replace(
                                                  SIPDirectory,
                                                  "%SIPDirectory%", 1)
                #sql = "SELECT fileUUID, currentLocation FROM Files WHERE currentLocation LIKE  '%s%' AND removedTime = 0 AND SIPUUID = '%s'" % (objectNameLike, SIPUUID)
                #ValueError: unsupported format character ''' (0x27) at index 76
                sql = "SELECT fileUUID, currentLocation FROM Files WHERE currentLocation LIKE  '" + objectNameLike + "%' AND removedTime = 0 AND SIPUUID = '" + SIPUUID + "'"
                c, sqlLock = databaseInterface.querySQL(sql)
                row = c.fetchone()
                if not row:
                    print >> sys.stderr, "No corresponding object for:", accessPath.replace(
                        SIPDirectory, "%SIPDirectory%", 1)
                    exitCode = 1
                update = []
                while row != None:
                    objectUUID = row[0]
                    objectPath = row[1]
                    objectExtension = objectPath.replace(objectNameLike, "", 1)
                    print objectName[objectNameExtensionIndex +
                                     1:], objectExtension, "\t",
                    if objectExtension.find(".") != -1:
                        print
                        row = c.fetchone()
                        continue
                    print objectName[objectNameExtensionIndex +
                                     1:], objectExtension, "\t",
                    print row
                    dipPath = os.path.join(
                        DIPDirectory, "objects",
                        "%s-%s" % (objectUUID, os.path.basename(accessPath)))
                    if copy:
                        print "TODO - copy not supported yet"
                    else:
                        #
                        dest = dipPath
                        renameAsSudo(accessPath, dest)

                        src = accessPath.replace(SIPDirectory,
                                                 "%SIPDirectory%")
                        dst = dest.replace(SIPDirectory, "%SIPDirectory%")
                        update.append((src, dst))

                        #
                    row = c.fetchone()
                sqlLock.release()
                for src, dst in update:
                    eventDetail = ""
                    eventOutcomeDetailNote = "moved from=\"" + src + "\"; moved to=\"" + dst + "\""
                    updateFileLocation(
                        src,
                        dst,
                        "movement",
                        date,
                        eventDetail,
                        sipUUID=SIPUUID,
                        eventOutcomeDetailNote=eventOutcomeDetailNote)
    return exitCode
コード例 #10
0
        quit(2)

    eventDetail= "program=\"sanitizeNames\"; version=\"" + version + "\""
    for line in lines:
        detoxfiles = line.split(" -> ")
        if len(detoxfiles) > 1 :
            oldfile = detoxfiles[0].split('\n',1)[0]
            newfile = detoxfiles[1]
            #print "line: ", line
            if os.path.isfile(newfile):
                oldfile = oldfile.replace(objectsDirectory, relativeReplacement, 1)
                newfile = newfile.replace(objectsDirectory, relativeReplacement, 1)
                print oldfile, " -> ", newfile

                if groupType == "%SIPDirectory%":
                    updateFileLocation(oldfile, newfile, "name cleanup", date, "prohibited characters removed:" + eventDetail, fileUUID=None, sipUUID=sipUUID)
                elif groupType == "%transferDirectory%":
                    updateFileLocation(oldfile, newfile, "name cleanup", date, "prohibited characters removed:" + eventDetail, fileUUID=None, transferUUID=sipUUID)
                else:
                    print >>sys.stderr, "bad group type", groupType
                    exit(3)

            elif os.path.isdir(newfile):
                oldfile = oldfile.replace(objectsDirectory, relativeReplacement, 1) + "/"
                newfile = newfile.replace(objectsDirectory, relativeReplacement, 1) + "/"
                directoryContents = []

                sql = "SELECT fileUUID, currentLocation FROM Files WHERE Files.removedTime = 0 AND Files.currentLocation LIKE '" + MySQLdb.escape_string(oldfile.replace("\\", "\\\\")).replace("%","\%") + "%' AND " + groupSQL + " = '" + groupID + "';"

                c, sqlLock = databaseInterface.querySQL(sql)
                row = c.fetchone()
コード例 #11
0
        if len(detoxfiles) > 1:
            oldfile = detoxfiles[0].split('\n', 1)[0]
            newfile = detoxfiles[1]
            #print "line: ", line
            if os.path.isfile(newfile):
                oldfile = oldfile.replace(objectsDirectory,
                                          relativeReplacement, 1)
                newfile = newfile.replace(objectsDirectory,
                                          relativeReplacement, 1)
                print oldfile, " -> ", newfile

                if groupType == "%SIPDirectory%":
                    updateFileLocation(oldfile,
                                       newfile,
                                       "name cleanup",
                                       date,
                                       "prohibited characters removed:" +
                                       eventDetail,
                                       fileUUID=None,
                                       sipUUID=sipUUID)
                elif groupType == "%transferDirectory%":
                    updateFileLocation(oldfile,
                                       newfile,
                                       "name cleanup",
                                       date,
                                       "prohibited characters removed:" +
                                       eventDetail,
                                       fileUUID=None,
                                       transferUUID=sipUUID)
                else:
                    print >> sys.stderr, "bad group type", groupType
                    exit(3)
コード例 #12
0
def sanitize_object_names(objectsDirectory, sipUUID, date, groupType, groupSQL,
                          sipPath):
    """Sanitize object names in a Transfer/SIP."""
    relativeReplacement = objectsDirectory.replace(
        sipPath, groupType, 1)  # "%SIPDirectory%objects/"

    # Sanitize objects on disk
    sanitizations = sanitizeNames.sanitizeRecursively(objectsDirectory)
    for oldfile, newfile in sanitizations.items():
        logger.info('sanitizations: %s -> %s', oldfile, newfile)

    eventDetail = 'program="sanitizeNames"; version="' + sanitizeNames.VERSION + '"'

    # Update files in DB
    kwargs = {
        groupSQL: sipUUID,
        "removedtime__isnull": True,
    }
    for f in File.objects.filter(**kwargs):
        # Check all files to see if any parent directory had a sanitization event
        current_location = unicodeToStr(
            unicodedata.normalize('NFC', f.currentlocation)).replace(
                groupType, sipPath)
        sanitized_location = unicodeToStr(current_location)
        logger.info('Checking %s', current_location)

        # Check parent directories
        # Since directory keys are a mix of sanitized and unsanitized, this is a little complicated
        # Directories keys are in the form sanitized/sanitized/unsanitized
        # When a match is found (eg 'unsanitized' -> 'sanitized') reset the search
        # This will find 'sanitized/unsanitized2' -> 'sanitized/sanitized2' on the next pass
        # TODO This should be checked for a more efficient solution
        dirpath = sanitized_location
        while objectsDirectory in dirpath:  # Stay within unit
            if dirpath in sanitizations:  # Make replacement
                sanitized_location = sanitized_location.replace(
                    dirpath, sanitizations[dirpath])
                dirpath = sanitized_location  # Reset search
            else:  # Check next level up
                dirpath = os.path.dirname(dirpath)

        if current_location != sanitized_location:
            oldfile = current_location.replace(objectsDirectory,
                                               relativeReplacement, 1)
            newfile = sanitized_location.replace(objectsDirectory,
                                                 relativeReplacement, 1)
            kwargs = {
                'src': oldfile,
                'dst': newfile,
                'eventType': 'name cleanup',
                'eventDateTime': date,
                'eventDetail': "prohibited characters removed:" + eventDetail,
                'fileUUID': None,
            }
            if groupType == "%SIPDirectory%":
                kwargs['sipUUID'] = sipUUID
            elif groupType == "%transferDirectory%":
                kwargs['transferUUID'] = sipUUID
            else:
                print("bad group type", groupType, file=sys.stderr)
                sys.exit(3)
            logger.info('Sanitized name: %s -> %s', oldfile, newfile)
            print('Sanitized name:', oldfile, " -> ", newfile)
            updateFileLocation(**kwargs)
        else:
            logger.info('No sanitization for %s', current_location)
            print('No sanitization found for', current_location)