Beispiel #1
0
def createFileSec(directoryPath, structMapDiv):
    global fileNameToFileID
    global trimStructMap
    global trimStructMapObjects
    global globalDmdSecCounter
    global globalAmdSecCounter
    global globalDigiprovMDCounter
    global dmdSecs
    global amdSecs

    delayed = []
    filesInThisDirectory = []
    dspaceMetsDMDID = None
    directoryContents = os.listdir(directoryPath)
    directoryContentsTuples = []
    for item in directoryContents:
        itemdirectoryPath = os.path.join(directoryPath, item)
        if os.path.isdir(itemdirectoryPath):
            delayed.append(item)

        elif os.path.isfile(itemdirectoryPath):
            #find original file name
            directoryPathSTR = itemdirectoryPath.replace(
                baseDirectoryPath, baseDirectoryPathString, 1)
            sql = """SELECT Related.originalLocation AS 'derivedFromOriginalLocation', 
                            Current.originalLocation
                        FROM Files AS Current 
                        LEFT OUTER JOIN Derivations ON Current.fileUUID = Derivations.derivedFileUUID 
                        LEFT OUTER JOIN Files AS Related ON Derivations.sourceFileUUID = Related.fileUUID
                        WHERE Current.removedTime = 0 AND Current.%s = '%s' 
                            AND Current.currentLocation = '%s';""" % (
                fileGroupType, fileGroupIdentifier,
                MySQLdb.escape_string(directoryPathSTR))
            c, sqlLock = databaseInterface.querySQL(sql)
            row = c.fetchone()
            if row == None:
                print >> sys.stderr, "No uuid for file: \"", directoryPathSTR, "\""
                sharedVariablesAcrossModules.globalErrorCount += 1
                sqlLock.release()
                continue
            while row != None:
                #add to files in this directory tuple list
                derivedFromOriginalName = row[0]
                originalLocation = row[1]
                if derivedFromOriginalName != None:
                    originalLocation = derivedFromOriginalName
                originalName = os.path.basename(
                    originalLocation
                ) + u"/"  #+ u"/" keeps normalized after original / is very uncommon in a file name
                directoryContentsTuples.append((
                    originalName,
                    item,
                ))
                row = c.fetchone()
            sqlLock.release()

    #order files by their original name
    for originalName, item in sorted(
            directoryContentsTuples,
            key=lambda listItems: listItems[0],
            cmp=sharedVariablesAcrossModules.collator.compare):
        #item = unicode(item)
        itemdirectoryPath = os.path.join(directoryPath, item)

        #myuuid = uuid.uuid4()
        myuuid = ""
        DMDIDS = ""
        #directoryPathSTR = itemdirectoryPath.replace(baseDirectoryPath + "objects", "objects", 1)
        directoryPathSTR = itemdirectoryPath.replace(baseDirectoryPath,
                                                     baseDirectoryPathString,
                                                     1)

        sql = """SELECT fileUUID, fileGrpUse, fileGrpUUID, Files.transferUUID, label, originalLocation, Transfers.type 
                FROM Files
                LEFT OUTER JOIN Transfers ON Files.transferUUID = Transfers.transferUUID
                WHERE removedTime = 0 AND %s = '%s' AND Files.currentLocation = '%s';""" % (
            fileGroupType, fileGroupIdentifier,
            MySQLdb.escape_string(directoryPathSTR))
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        if row == None:
            print >> sys.stderr, "No uuid for file: \"", directoryPathSTR, "\""
            sharedVariablesAcrossModules.globalErrorCount += 1
            sqlLock.release()
            continue
        while row != None:
            myuuid = row[0]
            use = row[1]
            fileGrpUUID = row[2]
            transferUUID = row[3]
            label = row[4]
            originalLocation = row[5]
            typeOfTransfer = row[6]
            row = c.fetchone()
        sqlLock.release()

        filename = ''.join(quoteattr(item).split("\"")[1:-1])
        directoryPathSTR = itemdirectoryPath.replace(baseDirectoryPath, "", 1)
        #print filename, directoryPathSTR

        if typeOfTransfer == "TRIM" and trimStructMap == None:
            trimStructMap = etree.Element("structMap",
                                          attrib={
                                              "TYPE": "logical",
                                              "ID": "structMap_2",
                                              "LABEL":
                                              "Hierarchical arrangement"
                                          })
            trimStructMapObjects = etree.SubElement(trimStructMap,
                                                    "div",
                                                    attrib={
                                                        "TYPE": "File",
                                                        "LABEL": "objects"
                                                    })

            trimDmdSec = getTrimDmdSec(baseDirectoryPath, fileGroupIdentifier)
            globalDmdSecCounter += 1
            dmdSecs.append(trimDmdSec)
            ID = "dmdSec_" + globalDmdSecCounter.__str__()
            trimDmdSec.set("ID", ID)
            trimStructMapObjects.set("DMDID", ID)

            # ==

            trimAmdSec = etree.Element("amdSec")
            globalAmdSecCounter += 1
            amdSecs.append(trimAmdSec)
            ID = "amdSec_" + globalAmdSecCounter.__str__()
            trimAmdSec.set("ID", ID)

            digiprovMD = getTrimAmdSec(baseDirectoryPath, fileGroupIdentifier)
            globalDigiprovMDCounter += 1
            digiprovMD.set("ID",
                           "digiprovMD_" + globalDigiprovMDCounter.__str__())

            trimAmdSec.append(digiprovMD)

            trimStructMapObjects.set("ADMID", ID)

        FILEID = "%s-%s" % (item, myuuid)
        if FILEID[0].isdigit():
            FILEID = "_" + FILEID

        #<fptr FILEID="file1-UUID"/>
        fileDiv = etree.SubElement(structMapDiv, "div")
        if label != None:
            fileDiv.set("LABEL", label)
        fileDiv.set("TYPE", "Item")
        newChild(fileDiv, "fptr", sets=[("FILEID", FILEID)])
        fileNameToFileID[item] = FILEID

        GROUPID = ""
        if fileGrpUUID:
            GROUPID = "Group-%s" % (fileGrpUUID)
            if use == "TRIM file metadata":
                use = "metadata"

        elif use == "original" or use == "submissionDocumentation" or use == "metadata" or use == "maildirFile":
            GROUPID = "Group-%s" % (myuuid)
            if use == "maildirFile":
                use = "original"
            if use == "original":
                DMDIDS = createDMDIDSFromCSVParsedMetadataFiles(
                    originalLocation.replace('%transferDirectory%', "", 1))
                if DMDIDS:
                    fileDiv.set("DMDID", DMDIDS)
                if typeOfTransfer == "TRIM":
                    trimFileDiv = etree.SubElement(trimStructMapObjects,
                                                   "div",
                                                   attrib={"TYPE": "Item"})

                    trimFileDmdSec = getTrimFileDmdSec(baseDirectoryPath,
                                                       fileGroupIdentifier,
                                                       myuuid)
                    globalDmdSecCounter += 1
                    dmdSecs.append(trimFileDmdSec)
                    ID = "dmdSec_" + globalDmdSecCounter.__str__()
                    trimFileDmdSec.set("ID", ID)

                    trimFileDiv.set("DMDID", ID)

                    etree.SubElement(trimFileDiv,
                                     "fptr",
                                     attrib={"FILEID": FILEID})

        elif use == "preservation":
            sql = "SELECT * FROM Derivations WHERE derivedFileUUID = '" + myuuid + "';"
            c, sqlLock = databaseInterface.querySQL(sql)
            row = c.fetchone()
            while row != None:
                GROUPID = "Group-%s" % (row[1])
                row = c.fetchone()
            sqlLock.release()

        elif use == "license" or use == "text/ocr" or use == "DSPACEMETS":
            sql = """SELECT fileUUID FROM Files WHERE removedTime = 0 AND %s = '%s' AND fileGrpUse = 'original' AND originalLocation LIKE '%s/%%'""" % (
                fileGroupType, fileGroupIdentifier,
                MySQLdb.escape_string(
                    os.path.dirname(originalLocation)).replace("%", "\%"))
            c, sqlLock = databaseInterface.querySQL(sql)
            row = c.fetchone()
            while row != None:
                GROUPID = "Group-%s" % (row[0])
                row = c.fetchone()
            sqlLock.release()

        elif use == "service":
            fileFileIDPath = itemdirectoryPath.replace(
                baseDirectoryPath + "objects/service/",
                baseDirectoryPathString + "objects/")
            objectNameExtensionIndex = fileFileIDPath.rfind(".")
            fileFileIDPath = fileFileIDPath[:objectNameExtensionIndex + 1]
            sql = """SELECT fileUUID FROM Files WHERE removedTime = 0 AND %s = '%s' AND fileGrpUse = 'original' AND currentLocation LIKE '%s%%'""" % (
                fileGroupType, fileGroupIdentifier,
                MySQLdb.escape_string(fileFileIDPath.replace("%", "\%")))
            c, sqlLock = databaseInterface.querySQL(sql)
            row = c.fetchone()
            while row != None:
                GROUPID = "Group-%s" % (row[0])
                row = c.fetchone()
            sqlLock.release()

        elif use == "TRIM container metadata":
            GROUPID = "Group-%s" % (myuuid)
            use = "metadata"

        if transferUUID:
            sql = "SELECT type FROM Transfers WHERE transferUUID = '%s';" % (
                transferUUID)
            rows = databaseInterface.queryAllSQL(sql)
            if rows[0][0] == "Dspace":
                if use == "DSPACEMETS":
                    use = "submissionDocumentation"
                    admidApplyTo = None
                    if GROUPID == "":  #is an AIP identifier
                        GROUPID = myuuid
                        admidApplyTo = structMapDiv.getparent()

                    LABEL = "mets.xml-%s" % (GROUPID)
                    dmdSec, ID = createMDRefDMDSec(LABEL, itemdirectoryPath,
                                                   directoryPathSTR)
                    dmdSecs.append(dmdSec)
                    if admidApplyTo != None:
                        admidApplyTo.set("DMDID", ID)
                    else:
                        dspaceMetsDMDID = ID

        if GROUPID == "":
            sharedVariablesAcrossModules.globalErrorCount += 1
            print >> sys.stderr, "No groupID for file: \"", directoryPathSTR, "\""

        if use not in globalFileGrps:
            print >> sys.stderr, "Invalid use: \"%s\"" % (use)
            sharedVariablesAcrossModules.globalErrorCount += 1
        else:
            file = newChild(globalFileGrps[use],
                            "file",
                            sets=[("ID", FILEID), ("GROUPID", GROUPID)])
            if use == "original":
                filesInThisDirectory.append(file)
            #<Flocat xlink:href="objects/file1-UUID" locType="other" otherLocType="system"/>
            Flocat = newChild(file,
                              "FLocat",
                              sets=[(xlinkBNS + "href", directoryPathSTR),
                                    ("LOCTYPE", "OTHER"),
                                    ("OTHERLOCTYPE", "SYSTEM")])
            if includeAmdSec:
                AMD, ADMID = getAMDSec(myuuid, directoryPathSTR, use,
                                       fileGroupType, fileGroupIdentifier,
                                       transferUUID, itemdirectoryPath,
                                       typeOfTransfer)
                amdSecs.append(AMD)
                file.set("ADMID", ADMID)

    if dspaceMetsDMDID != None:
        for file in filesInThisDirectory:
            file.set("DMDID", dspaceMetsDMDID)

    for item in sorted(delayed,
                       cmp=sharedVariablesAcrossModules.collator.compare):
        itemdirectoryPath = os.path.join(directoryPath, item)
        directoryDiv = newChild(structMapDiv,
                                "div",
                                sets=[("TYPE", "Directory"), ("LABEL", item)])
        DMDIDS = createDMDIDSFromCSVParsedMetadataDirectories(
            itemdirectoryPath.replace(baseDirectoryPath, "", 1))
        if DMDIDS:
            directoryDiv.set("DMDID", DMDIDS)
        createFileSec(itemdirectoryPath, directoryDiv)
def createFileSec(directoryPath, structMapDiv):
    global fileNameToFileID
    global trimStructMap
    global trimStructMapObjects
    global globalDmdSecCounter
    global globalAmdSecCounter
    global globalDigiprovMDCounter
    global dmdSecs
    global amdSecs
    
    
    delayed = []
    filesInThisDirectory = []
    dspaceMetsDMDID = None
    directoryContents = os.listdir(directoryPath)
    directoryContentsTuples = []
    for item in directoryContents:
        itemdirectoryPath = os.path.join(directoryPath, item)
        if os.path.isdir(itemdirectoryPath):
            delayed.append(item)

        elif os.path.isfile(itemdirectoryPath):
            #find original file name
            directoryPathSTR = itemdirectoryPath.replace(baseDirectoryPath, baseDirectoryPathString, 1)
            sql = """SELECT Related.originalLocation AS 'derivedFromOriginalLocation', 
                            Current.originalLocation
                        FROM Files AS Current 
                        LEFT OUTER JOIN Derivations ON Current.fileUUID = Derivations.derivedFileUUID 
                        LEFT OUTER JOIN Files AS Related ON Derivations.sourceFileUUID = Related.fileUUID
                        WHERE Current.removedTime = 0 AND Current.%s = '%s' 
                            AND Current.currentLocation = '%s';""" % (fileGroupType, fileGroupIdentifier, MySQLdb.escape_string(directoryPathSTR))
            c, sqlLock = databaseInterface.querySQL(sql)
            row = c.fetchone()
            if row == None:
                print >>sys.stderr, "No uuid for file: \"", directoryPathSTR, "\""
                sharedVariablesAcrossModules.globalErrorCount += 1
                sqlLock.release()
                continue
            while row != None:
                #add to files in this directory tuple list
                derivedFromOriginalName = row[0]
                originalLocation = row[1]
                if derivedFromOriginalName != None:
                    originalLocation = derivedFromOriginalName
                originalName = os.path.basename(originalLocation) + u"/" #+ u"/" keeps normalized after original / is very uncommon in a file name
                directoryContentsTuples.append((originalName, item,)) 
                row = c.fetchone()
            sqlLock.release()
            
    #order files by their original name
    for originalName, item in sorted(directoryContentsTuples, key=lambda listItems: listItems[0], cmp=sharedVariablesAcrossModules.collator.compare):
        #item = unicode(item)
        itemdirectoryPath = os.path.join(directoryPath, item)
            
        #myuuid = uuid.uuid4()
        myuuid=""
        DMDIDS=""
        #directoryPathSTR = itemdirectoryPath.replace(baseDirectoryPath + "objects", "objects", 1)
        directoryPathSTR = itemdirectoryPath.replace(baseDirectoryPath, baseDirectoryPathString, 1)

        sql = """SELECT fileUUID, fileGrpUse, fileGrpUUID, Files.transferUUID, label, originalLocation, Transfers.type 
                FROM Files
                LEFT OUTER JOIN Transfers ON Files.transferUUID = Transfers.transferUUID
                WHERE removedTime = 0 AND %s = '%s' AND Files.currentLocation = '%s';""" % (fileGroupType, fileGroupIdentifier, MySQLdb.escape_string(directoryPathSTR))
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        if row == None:
            print >>sys.stderr, "No uuid for file: \"", directoryPathSTR, "\""
            sharedVariablesAcrossModules.globalErrorCount += 1
            sqlLock.release()
            continue
        while row != None:
            myuuid = row[0]
            use = row[1]
            fileGrpUUID = row[2]
            transferUUID = row[3]
            label = row[4]
            originalLocation = row[5]
            typeOfTransfer = row[6]
            row = c.fetchone()
        sqlLock.release()
        
        filename = ''.join(quoteattr(item).split("\"")[1:-1])
        directoryPathSTR = itemdirectoryPath.replace(baseDirectoryPath, "", 1)
        #print filename, directoryPathSTR

        if typeOfTransfer == "TRIM" and trimStructMap == None:
            trimStructMap = etree.Element("structMap", attrib={"TYPE":"logical", "ID":"structMap_2", "LABEL":"Hierarchical arrangement"})
            trimStructMapObjects = etree.SubElement(trimStructMap, "div", attrib={"TYPE":"File", "LABEL":"objects"})
            
            trimDmdSec = getTrimDmdSec(baseDirectoryPath, fileGroupIdentifier)
            globalDmdSecCounter += 1
            dmdSecs.append(trimDmdSec)
            ID = "dmdSec_" + globalDmdSecCounter.__str__()
            trimDmdSec.set("ID", ID)
            trimStructMapObjects.set("DMDID", ID)
            
            # ==
            
            trimAmdSec = etree.Element("amdSec")
            globalAmdSecCounter += 1
            amdSecs.append(trimAmdSec)
            ID = "amdSec_" + globalAmdSecCounter.__str__()
            trimAmdSec.set("ID", ID)
                        
            digiprovMD = getTrimAmdSec(baseDirectoryPath, fileGroupIdentifier)
            globalDigiprovMDCounter += 1
            digiprovMD.set("ID", "digiprovMD_"+ globalDigiprovMDCounter.__str__())
            
            trimAmdSec.append(digiprovMD)
            
            trimStructMapObjects.set("ADMID", ID)
            
        FILEID="%s-%s" % (item, myuuid)
        if FILEID[0].isdigit():
            FILEID = "_" + FILEID


        #<fptr FILEID="file1-UUID"/>
        fileDiv = etree.SubElement(structMapDiv, "div")
        if label != None:
            fileDiv.set("LABEL", label)
        fileDiv.set("TYPE", "Item") 
        newChild(fileDiv, "fptr", sets=[("FILEID",FILEID)])
        fileNameToFileID[item] = FILEID

        GROUPID = ""
        if fileGrpUUID:
            GROUPID = "Group-%s" % (fileGrpUUID)
            if use == "TRIM file metadata":
                use = "metadata"
            
        elif  use == "original" or use == "submissionDocumentation" or use == "metadata" or use == "maildirFile":
            GROUPID = "Group-%s" % (myuuid)
            if use == "maildirFile":
                use = "original"
            if use == "original":
                DMDIDS = createDMDIDSFromCSVParsedMetadataFiles(originalLocation.replace('%transferDirectory%', "", 1))
                if DMDIDS:
                    fileDiv.set("DMDID", DMDIDS)
                if typeOfTransfer == "TRIM":
                    trimFileDiv = etree.SubElement(trimStructMapObjects, "div", attrib={"TYPE":"Item"})
                    
                    trimFileDmdSec = getTrimFileDmdSec(baseDirectoryPath, fileGroupIdentifier, myuuid)
                    globalDmdSecCounter += 1
                    dmdSecs.append(trimFileDmdSec)
                    ID = "dmdSec_" + globalDmdSecCounter.__str__()
                    trimFileDmdSec.set("ID", ID)
                    
                    trimFileDiv.set("DMDID", ID)       
                    
                    etree.SubElement(trimFileDiv, "fptr", attrib={"FILEID":FILEID})             

        elif use == "preservation":
            sql = "SELECT * FROM Derivations WHERE derivedFileUUID = '" + myuuid + "';"
            c, sqlLock = databaseInterface.querySQL(sql)
            row = c.fetchone()
            while row != None:
                GROUPID = "Group-%s" % (row[1])
                row = c.fetchone()
            sqlLock.release()

        elif use == "license" or use == "text/ocr" or use == "DSPACEMETS":
            sql = """SELECT fileUUID FROM Files WHERE removedTime = 0 AND %s = '%s' AND fileGrpUse = 'original' AND originalLocation LIKE '%s/%%'""" % (fileGroupType, fileGroupIdentifier, MySQLdb.escape_string(os.path.dirname(originalLocation)).replace("%", "\%"))
            c, sqlLock = databaseInterface.querySQL(sql)
            row = c.fetchone()
            while row != None:
                GROUPID = "Group-%s" % (row[0])
                row = c.fetchone()
            sqlLock.release()

        elif use == "service":
            fileFileIDPath = itemdirectoryPath.replace(baseDirectoryPath + "objects/service/", baseDirectoryPathString + "objects/")
            objectNameExtensionIndex = fileFileIDPath.rfind(".")
            fileFileIDPath = fileFileIDPath[:objectNameExtensionIndex + 1]
            sql = """SELECT fileUUID FROM Files WHERE removedTime = 0 AND %s = '%s' AND fileGrpUse = 'original' AND currentLocation LIKE '%s%%'""" % (fileGroupType, fileGroupIdentifier, MySQLdb.escape_string(fileFileIDPath.replace("%", "\%")))
            c, sqlLock = databaseInterface.querySQL(sql)
            row = c.fetchone()
            while row != None:
                GROUPID = "Group-%s" % (row[0])
                row = c.fetchone()
            sqlLock.release()
        
        
        elif use == "TRIM container metadata":
            GROUPID = "Group-%s" % (myuuid)
            use = "metadata"
        

        if transferUUID:
            sql = "SELECT type FROM Transfers WHERE transferUUID = '%s';" % (transferUUID)
            rows = databaseInterface.queryAllSQL(sql)
            if rows[0][0] == "Dspace":
                if use == "DSPACEMETS":
                    use = "submissionDocumentation"
                    admidApplyTo = None
                    if GROUPID=="": #is an AIP identifier
                        GROUPID = myuuid
                        admidApplyTo = structMapDiv.getparent()


                    LABEL = "mets.xml-%s" % (GROUPID)
                    dmdSec, ID = createMDRefDMDSec(LABEL, itemdirectoryPath, directoryPathSTR)
                    dmdSecs.append(dmdSec)
                    if admidApplyTo != None:
                        admidApplyTo.set("DMDID", ID)
                    else:
                        dspaceMetsDMDID = ID

        if GROUPID=="":
            sharedVariablesAcrossModules.globalErrorCount += 1
            print >>sys.stderr, "No groupID for file: \"", directoryPathSTR, "\""

        if use not in globalFileGrps:
            print >>sys.stderr, "Invalid use: \"%s\"" % (use)
            sharedVariablesAcrossModules.globalErrorCount += 1
        else:
            file = newChild(globalFileGrps[use], "file", sets=[("ID",FILEID), ("GROUPID",GROUPID)])
            if use == "original":
                filesInThisDirectory.append(file)
            #<Flocat xlink:href="objects/file1-UUID" locType="other" otherLocType="system"/>
            Flocat = newChild(file, "FLocat", sets=[(xlinkBNS +"href",directoryPathSTR), ("LOCTYPE","OTHER"), ("OTHERLOCTYPE", "SYSTEM")])
            if includeAmdSec:
                AMD, ADMID = getAMDSec(myuuid, directoryPathSTR, use, fileGroupType, fileGroupIdentifier, transferUUID, itemdirectoryPath, typeOfTransfer)
                amdSecs.append(AMD)
                file.set("ADMID", ADMID)


    if dspaceMetsDMDID != None:
        for file in filesInThisDirectory:
            file.set("DMDID", dspaceMetsDMDID)
    
    for item in sorted(delayed, cmp=sharedVariablesAcrossModules.collator.compare):
        itemdirectoryPath = os.path.join(directoryPath, item)
        directoryDiv = newChild(structMapDiv, "div", sets=[("TYPE","Directory"), ("LABEL",item)])
        DMDIDS = createDMDIDSFromCSVParsedMetadataDirectories(itemdirectoryPath.replace(baseDirectoryPath, "", 1))
        if DMDIDS:
            directoryDiv.set("DMDID", DMDIDS)
        createFileSec(itemdirectoryPath, directoryDiv)
def createFileSec(directoryPath, parentDiv, baseDirectoryPath, baseDirectoryName, fileGroupIdentifier, fileGroupType, includeAmdSec=True):
    global fileNameToFileID
    global trimStructMap
    global trimStructMapObjects
    global globalDmdSecCounter
    global globalAmdSecCounter
    global globalDigiprovMDCounter
    global dmdSecs
    global amdSecs
    
    filesInThisDirectory = []
    dspaceMetsDMDID = None
    try:
        directoryContents = sorted(os.listdir(directoryPath))
    except os.error:
        # Directory doesn't exist
        print >> sys.stderr, directoryPath, "doesn't exist"
        return

    structMapDiv = etree.SubElement(parentDiv, ns.metsBNS + 'div', TYPE='Directory', LABEL=os.path.basename(directoryPath))

    DMDIDS = createDMDIDsFromCSVMetadata(directoryPath.replace(baseDirectoryPath, "", 1))
    if DMDIDS:
        structMapDiv.set("DMDID", DMDIDS)

    for item in directoryContents:
        itemdirectoryPath = os.path.join(directoryPath, item)
        if os.path.isdir(itemdirectoryPath):
            createFileSec(itemdirectoryPath, structMapDiv, baseDirectoryPath, baseDirectoryName, fileGroupIdentifier, fileGroupType, includeAmdSec)
        elif os.path.isfile(itemdirectoryPath):
            myuuid=""
            DMDIDS=""
            directoryPathSTR = itemdirectoryPath.replace(baseDirectoryPath, baseDirectoryName, 1)

            kwargs = {
                "removedtime__isnull": True,
                fileGroupType: fileGroupIdentifier,
                "currentlocation": directoryPathSTR
            }
            try:
                f = File.objects.get(**kwargs)
            except File.DoesNotExist:
                print >>sys.stderr, "No uuid for file: \"", directoryPathSTR, "\""
                sharedVariablesAcrossModules.globalErrorCount += 1
                continue

            myuuid = f.uuid
            use = f.filegrpuse
            fileGrpUUID = f.filegrpuuid
            transferUUID = f.transfer_id
            label = f.label
            originalLocation = f.originallocation
            typeOfTransfer = f.transfer.type if f.transfer else None

            directoryPathSTR = itemdirectoryPath.replace(baseDirectoryPath, "", 1)

            if typeOfTransfer == "TRIM" and trimStructMap is None:
                trimStructMap = etree.Element(ns.metsBNS + "structMap", attrib={"TYPE":"logical", "ID":"structMap_2", "LABEL":"Hierarchical arrangement"})
                trimStructMapObjects = etree.SubElement(trimStructMap, ns.metsBNS + "div", attrib={"TYPE":"File", "LABEL":"objects"})

                trimDmdSec = getTrimDmdSec(baseDirectoryPath, fileGroupIdentifier)
                globalDmdSecCounter += 1
                dmdSecs.append(trimDmdSec)
                ID = "dmdSec_" + globalDmdSecCounter.__str__()
                trimDmdSec.set("ID", ID)
                trimStructMapObjects.set("DMDID", ID)

                trimAmdSec = etree.Element(ns.metsBNS + "amdSec")
                globalAmdSecCounter += 1
                amdSecs.append(trimAmdSec)
                ID = "amdSec_" + globalAmdSecCounter.__str__()
                trimAmdSec.set("ID", ID)

                digiprovMD = getTrimAmdSec(baseDirectoryPath, fileGroupIdentifier)
                globalDigiprovMDCounter += 1
                digiprovMD.set("ID", "digiprovMD_"+ globalDigiprovMDCounter.__str__())

                trimAmdSec.append(digiprovMD)

                trimStructMapObjects.set("ADMID", ID)

            fileId="file-%s" % (myuuid, )

            #<fptr FILEID="file-<UUID>" LABEL="filename.ext">
            label = item if not label else label
            fileDiv = etree.SubElement(structMapDiv, ns.metsBNS + "div", LABEL=label, TYPE='Item')
            etree.SubElement(fileDiv, ns.metsBNS + 'fptr', FILEID=fileId)
            fileNameToFileID[item] = fileId

            GROUPID = ""
            if fileGrpUUID:
                GROUPID = "Group-%s" % (fileGrpUUID)
                if use == "TRIM file metadata":
                    use = "metadata"

            elif use in ("original", "submissionDocumentation", "metadata", "maildirFile"):
                GROUPID = "Group-%s" % (myuuid)
                if use == "maildirFile":
                    use = "original"
                if use == "original":
                    DMDIDS = createDMDIDsFromCSVMetadata(originalLocation.replace('%transferDirectory%', "", 1))
                    if DMDIDS:
                        fileDiv.set("DMDID", DMDIDS)
                    if typeOfTransfer == "TRIM":
                        trimFileDiv = etree.SubElement(trimStructMapObjects, ns.metsBNS + "div", attrib={"TYPE":"Item"})
                        
                        trimFileDmdSec = getTrimFileDmdSec(baseDirectoryPath, fileGroupIdentifier, myuuid)
                        globalDmdSecCounter += 1
                        dmdSecs.append(trimFileDmdSec)
                        ID = "dmdSec_" + globalDmdSecCounter.__str__()
                        trimFileDmdSec.set("ID", ID)

                        trimFileDiv.set("DMDID", ID)

                        etree.SubElement(trimFileDiv, ns.metsBNS + "fptr", FILEID=fileId)

            # Dspace transfers are treated specially, but some of these fileGrpUses
            # may be encountered in other types
            elif typeOfTransfer == "Dspace" and (use in ("license", "text/ocr", "DSPACEMETS")):
                kwargs = {
                    "removedtime__isnull": True,
                    fileGroupType: fileGroupIdentifier,
                    "filegrpuse": "original",
                    "originallocation__startswith": os.path.dirname(originalLocation)
                }
                try:
                    f = File.objects.get(**kwargs)
                    GROUPID = 'Group-' + f.uuid
                except (File.DoesNotExist, File.MultipleObjectsReturned):
                    pass


            elif use == "preservation" or use == "text/ocr":
                d = Derivation.objects.get(derived_file_id=myuuid)
                GROUPID = "Group-" + d.source_file_id


            elif use == "service":
                fileFileIDPath = itemdirectoryPath.replace(baseDirectoryPath + "objects/service/", baseDirectoryName + "objects/")
                objectNameExtensionIndex = fileFileIDPath.rfind(".")
                fileFileIDPath = fileFileIDPath[:objectNameExtensionIndex + 1]

                kwargs = {
                    "removedtime__isnull": True,
                    fileGroupType: fileGroupIdentifier,
                    "filegrpuse": "original",
                    "currentlocation__startswith": fileFileIDPath
                }
                f = File.objects.get(**kwargs)
                GROUPID = "Group-" + f.uuid
            
            
            elif use == "TRIM container metadata":
                GROUPID = "Group-%s" % (myuuid)
                use = "metadata"
            

            if transferUUID:
                t = Transfer.objects.get(uuid=transferUUID)

                if t.type == "Dspace":
                    if use == "DSPACEMETS":
                        use = "submissionDocumentation"
                        admidApplyTo = None
                        if GROUPID=="": #is an AIP identifier
                            GROUPID = myuuid
                            admidApplyTo = structMapDiv.getparent()

                        label = "mets.xml-%s" % (GROUPID)
                        dspace_dmdsecs = createDSpaceDMDSec(label, itemdirectoryPath, directoryPathSTR)
                        if dspace_dmdsecs:
                            dmdSecs.extend(dspace_dmdsecs.values())
                            ids = ' '.join(dspace_dmdsecs.keys())
                            if admidApplyTo is not None:
                                admidApplyTo.set("DMDID", ids)
                            else:
                                dspaceMetsDMDID = ids

            if GROUPID=="":
                sharedVariablesAcrossModules.globalErrorCount += 1
                print >>sys.stderr, "No groupID for file: \"", directoryPathSTR, "\""

            if use not in globalFileGrps:
                print >>sys.stderr, "Invalid use: \"%s\"" % (use)
                sharedVariablesAcrossModules.globalErrorCount += 1
            else:
                file_elem = etree.SubElement(globalFileGrps[use], ns.metsBNS + "file", ID=fileId, GROUPID=GROUPID)
                if use == "original":
                    filesInThisDirectory.append(file_elem)
                #<Flocat xlink:href="objects/file1-UUID" locType="other" otherLocType="system"/>
                newChild(file_elem, ns.metsBNS + "FLocat", sets=[(ns.xlinkBNS +"href",directoryPathSTR), ("LOCTYPE","OTHER"), ("OTHERLOCTYPE", "SYSTEM")])
                if includeAmdSec:
                    AMD, ADMID = getAMDSec(myuuid, directoryPathSTR, use, fileGroupType, fileGroupIdentifier, transferUUID, itemdirectoryPath, typeOfTransfer, baseDirectoryPath)
                    amdSecs.append(AMD)
                    file_elem.set("ADMID", ADMID)

    if dspaceMetsDMDID != None:
        for file_elem in filesInThisDirectory:
            file_elem.set("DMDID", dspaceMetsDMDID)
    
    return structMapDiv
Beispiel #4
0
def createFileSec(directoryPath, parentDiv, baseDirectoryPath, baseDirectoryName, fileGroupIdentifier, fileGroupType, includeAmdSec=True):
    """
    Creates fileSec and structMap entries for files on disk recursively.

    :param directoryPath: Path to recursively traverse and create METS entries for
    :param parentDiv: structMap div to attach created children to
    :param baseDirectoryPath: SIP path
    :param baseDirectoryName: Name of the %var% for the SIP path
    :param fileGroupIdentifier: SIP UUID
    :param fileGroupType: Name of the foreign key field linking to SIP UUID in files.
    :param includeAmdSec: If True, creates amdSecs for the files
    """
    global fileNameToFileID
    global trimStructMap
    global trimStructMapObjects
    global globalDmdSecCounter
    global globalAmdSecCounter
    global globalDigiprovMDCounter
    global dmdSecs
    global amdSecs

    filesInThisDirectory = []
    dspaceMetsDMDID = None
    try:
        directoryContents = sorted(os.listdir(directoryPath))
    except os.error:
        # Directory doesn't exist
        print(directoryPath, "doesn't exist", file=sys.stderr)
        return

    structMapDiv = etree.SubElement(parentDiv, ns.metsBNS + 'div', TYPE='Directory', LABEL=os.path.basename(directoryPath))

    DMDIDS = createDMDIDsFromCSVMetadata(directoryPath.replace(baseDirectoryPath, "", 1))
    if DMDIDS:
        structMapDiv.set("DMDID", DMDIDS)

    for item in directoryContents:
        itemdirectoryPath = os.path.join(directoryPath, item)
        if os.path.isdir(itemdirectoryPath):
            createFileSec(itemdirectoryPath, structMapDiv, baseDirectoryPath, baseDirectoryName, fileGroupIdentifier, fileGroupType, includeAmdSec)
        elif os.path.isfile(itemdirectoryPath):
            # Setup variables for creating file metadata
            DMDIDS = ""
            directoryPathSTR = itemdirectoryPath.replace(baseDirectoryPath, baseDirectoryName, 1)

            kwargs = {
                "removedtime__isnull": True,
                fileGroupType: fileGroupIdentifier,
                "currentlocation": directoryPathSTR
            }
            try:
                f = File.objects.get(**kwargs)
            except File.DoesNotExist:
                print("No uuid for file: \"", directoryPathSTR, "\"", file=sys.stderr)
                sharedVariablesAcrossModules.globalErrorCount += 1
                continue

            use = f.filegrpuse
            label = f.label
            typeOfTransfer = f.transfer.type if f.transfer else None

            directoryPathSTR = itemdirectoryPath.replace(baseDirectoryPath, "", 1)

            # Special TRIM processing
            if typeOfTransfer == "TRIM" and trimStructMap is None:
                trimStructMap = etree.Element(ns.metsBNS + "structMap", attrib={"TYPE": "logical", "ID": "structMap_2", "LABEL": "Hierarchical arrangement"})
                trimStructMapObjects = etree.SubElement(trimStructMap, ns.metsBNS + "div", attrib={"TYPE": "File", "LABEL": "objects"})

                trimDmdSec = getTrimDmdSec(baseDirectoryPath, fileGroupIdentifier)
                globalDmdSecCounter += 1
                dmdSecs.append(trimDmdSec)
                ID = "dmdSec_" + globalDmdSecCounter.__str__()
                trimDmdSec.set("ID", ID)
                trimStructMapObjects.set("DMDID", ID)

                trimAmdSec = etree.Element(ns.metsBNS + "amdSec")
                globalAmdSecCounter += 1
                amdSecs.append(trimAmdSec)
                ID = "amdSec_" + globalAmdSecCounter.__str__()
                trimAmdSec.set("ID", ID)

                digiprovMD = getTrimAmdSec(baseDirectoryPath, fileGroupIdentifier)
                globalDigiprovMDCounter += 1
                digiprovMD.set("ID", "digiprovMD_" + str(globalDigiprovMDCounter))

                trimAmdSec.append(digiprovMD)

                trimStructMapObjects.set("ADMID", ID)

            # Create <div TYPE="Item"> and child <fptr>
            # <fptr FILEID="file-<UUID>" LABEL="filename.ext">
            fileId = "file-{}".format(f.uuid)
            label = item if not label else label
            fileDiv = etree.SubElement(structMapDiv, ns.metsBNS + "div", LABEL=label, TYPE='Item')
            etree.SubElement(fileDiv, ns.metsBNS + 'fptr', FILEID=fileId)
            fileNameToFileID[item] = fileId

            # Determine fileGrp @GROUPID based on the file's fileGrpUse and transfer type
            GROUPID = ""
            if f.filegrpuuid:
                # GROUPID was determined elsewhere
                GROUPID = "Group-%s" % (f.filegrpuuid)
                if use == "TRIM file metadata":
                    use = "metadata"

            elif use in ("original", "submissionDocumentation", "metadata", "maildirFile"):
                # These files are in a group defined by themselves
                GROUPID = "Group-%s" % (f.uuid)
                if use == "maildirFile":
                    use = "original"
                # Check for CSV-based Dublincore dmdSec
                if use == "original":
                    DMDIDS = createDMDIDsFromCSVMetadata(f.originallocation.replace('%transferDirectory%', "", 1))
                    if DMDIDS:
                        fileDiv.set("DMDID", DMDIDS)
                    # More special TRIM processing
                    if typeOfTransfer == "TRIM":
                        trimFileDiv = etree.SubElement(trimStructMapObjects, ns.metsBNS + "div", attrib={"TYPE": "Item"})

                        trimFileDmdSec = getTrimFileDmdSec(baseDirectoryPath, fileGroupIdentifier, f.uuid)
                        globalDmdSecCounter += 1
                        dmdSecs.append(trimFileDmdSec)
                        ID = "dmdSec_" + globalDmdSecCounter.__str__()
                        trimFileDmdSec.set("ID", ID)

                        trimFileDiv.set("DMDID", ID)

                        etree.SubElement(trimFileDiv, ns.metsBNS + "fptr", FILEID=fileId)

            elif typeOfTransfer == "Dspace" and (use in ("license", "text/ocr", "DSPACEMETS")):
                # Dspace transfers are treated specially, but some of these fileGrpUses may be encountered in other types
                kwargs = {
                    "removedtime__isnull": True,
                    fileGroupType: fileGroupIdentifier,
                    "filegrpuse": "original",
                    "originallocation__startswith": os.path.dirname(f.originallocation)
                }
                original_file = File.objects.filter(**kwargs).first()
                if original_file is not None:
                    GROUPID = 'Group-' + original_file.uuid

            elif use in ("preservation", "text/ocr"):
                # Derived files should be in the original file's group
                d = Derivation.objects.get(derived_file_id=f.uuid)
                GROUPID = "Group-" + d.source_file_id

            elif use == "service":
                # Service files are in the original file's group
                fileFileIDPath = itemdirectoryPath.replace(baseDirectoryPath + "objects/service/", baseDirectoryName + "objects/")
                objectNameExtensionIndex = fileFileIDPath.rfind(".")
                fileFileIDPath = fileFileIDPath[:objectNameExtensionIndex + 1]

                kwargs = {
                    "removedtime__isnull": True,
                    fileGroupType: fileGroupIdentifier,
                    "filegrpuse": "original",
                    "currentlocation__startswith": fileFileIDPath
                }
                original_file = File.objects.get(**kwargs)
                GROUPID = "Group-" + original_file.uuid

            elif use == "TRIM container metadata":
                GROUPID = "Group-%s" % (f.uuid)
                use = "metadata"

            # Special DSPACEMETS processing
            if f.transfer and f.transfer.type == "Dspace" and use == "DSPACEMETS":
                use = "submissionDocumentation"
                admidApplyTo = None
                if GROUPID == "":  # is an AIP identifier
                    GROUPID = f.uuid
                    admidApplyTo = structMapDiv.getparent()

                label = "mets.xml-%s" % (GROUPID)
                dspace_dmdsecs = createDSpaceDMDSec(label, itemdirectoryPath, directoryPathSTR)
                if dspace_dmdsecs:
                    dmdSecs.extend(dspace_dmdsecs.values())
                    ids = ' '.join(dspace_dmdsecs.keys())
                    if admidApplyTo is not None:
                        admidApplyTo.set("DMDID", ids)
                    else:
                        dspaceMetsDMDID = ids

            if GROUPID == "":
                sharedVariablesAcrossModules.globalErrorCount += 1
                print("No groupID for file: \"", directoryPathSTR, "\"", file=sys.stderr)

            if use not in globalFileGrps:
                print('Invalid use: "%s"' % (use), file=sys.stderr)
                sharedVariablesAcrossModules.globalErrorCount += 1
            else:
                file_elem = etree.SubElement(globalFileGrps[use], ns.metsBNS + "file", ID=fileId, GROUPID=GROUPID)
                if use == "original":
                    filesInThisDirectory.append(file_elem)
                #<Flocat xlink:href="objects/file1-UUID" locType="other" otherLocType="system"/>
                newChild(file_elem, ns.metsBNS + "FLocat", sets=[(ns.xlinkBNS + "href", directoryPathSTR), ("LOCTYPE", "OTHER"), ("OTHERLOCTYPE", "SYSTEM")])
                if includeAmdSec:
                    AMD, ADMID = getAMDSec(f.uuid, directoryPathSTR, use, fileGroupType, fileGroupIdentifier, f.transfer_id, itemdirectoryPath, typeOfTransfer, baseDirectoryPath)
                    amdSecs.append(AMD)
                    file_elem.set("ADMID", ADMID)

    if dspaceMetsDMDID is not None:
        for file_elem in filesInThisDirectory:
            file_elem.set("DMDID", dspaceMetsDMDID)

    return structMapDiv