Ejemplo n.º 1
0
def bridgeWatchedDirectories():
    ""
    global allLinks
    sql = "SELECT watchedDirectoryPath, startingLink FROM WatchedDirectories Join MicroServiceChains ON WatchedDirectories.chain = MicroServiceChains.pk;"
    rows = databaseInterface.queryAllSQL(sql)
    for row in rows:
        countOfSources = 0
        watchedDirectoryPath, startingLink = row
        sql = "SELECT MicroServiceChainLinks.pk FROM StandardTasksConfigs JOIN TasksConfigs ON TasksConfigs.taskTypePKReference = StandardTasksConfigs.pk JOIN MicroServiceChainLinks ON MicroServiceChainLinks.currentTask = TasksConfigs.pk WHERE ( execute LIKE 'moveSIP%%' OR execute LIKE 'moveTransfer%%') AND taskType = '36b2e239-4a57-4aa5-8ebc-7a29139baca6' AND arguments like '%%%s%%';" % (watchedDirectoryPath.replace('%', '\%'))
        rows2 = databaseInterface.queryAllSQL(sql)
        for row2 in rows2:
            microServiceChainLink = row2[0]
            addArrow(microServiceChainLink, startingLink, color="yellow")
            countOfSources +=1
        sql = "SELECT MicroServiceChainLinks.pk FROM StandardTasksConfigs JOIN TasksConfigs ON TasksConfigs.taskTypePKReference = StandardTasksConfigs.pk JOIN MicroServiceChainLinks ON MicroServiceChainLinks.currentTask = TasksConfigs.pk WHERE ( execute LIKE 'moveSIP%%' OR execute LIKE 'moveTransfer%%') AND taskType = '36b2e239-4a57-4aa5-8ebc-7a29139baca6' AND arguments like '%%%s%%';" % (watchedDirectoryPath.replace('%watchDirectoryPath%', '%sharedPath%watchedDirectories/', 1).replace('%', '\%'))
        rows2 = databaseInterface.queryAllSQL(sql)
        for row2 in rows2:
            microServiceChainLink = row2[0]
            addArrow(microServiceChainLink, startingLink, color="yellow")
            countOfSources +=1
            
        if countOfSources == 0:
            print "no sources for watched directory: ", watchedDirectoryPath 
            
    return
def index_transfer_files(conn, uuid, pathToTransfer, index, type):
    filesIndexed = 0
    ingest_date  = str(datetime.datetime.today())[0:10]
    create_time  = time.time()

    # extract transfer name from path
    path_without_uuid = pathToTransfer[:-45]
    last_slash_position = path_without_uuid.rfind('/')
    transfer_name = path_without_uuid[last_slash_position + 1:]

    # get accessionId from transfers table using UUID
    accession_id = ''
    sql = "SELECT accessionID from Transfers WHERE transferUUID='" + MySQLdb.escape_string(uuid) + "'"

    rows = databaseInterface.queryAllSQL(sql)
    if len(rows) > 0:
        accession_id = rows[0][0]

    # get file UUID information
    fileUUIDs = {}
    sql = "SELECT currentLocation, fileUUID FROM Files WHERE transferUUID='" + MySQLdb.escape_string(uuid) + "'"

    rows = databaseInterface.queryAllSQL(sql)
    for row in rows:
        file_path = row[0]
        fileUUIDs[file_path] = row[1]

    for filepath in list_files_in_dir(pathToTransfer):
        if os.path.isfile(filepath):

            relative_path = '%transferDirectory%objects' + filepath.replace(pathToTransfer, '')

            sql = "SELECT fileUUID FROM Files WHERE currentLocation='" + MySQLdb.escape_string(relative_path) + "' AND transferUUID='" + MySQLdb.escape_string(uuid) + "'"
            rows = databaseInterface.queryAllSQL(sql)
            if len(rows) > 0:
                file_uuid = rows[0][0]
            else:
                file_uuid = ''

            indexData = {
              'filepath'     : filepath,
              'filename'     : os.path.basename(filepath),
              'fileuuid'     : file_uuid,
              'sipuuid'      : uuid,
              'accessionid'  : accession_id,
              'status'       : '',
              'origin'       : getDashboardUUID(),
              'ingestdate'   : ingest_date,
              'created'      : create_time
            }

            fileName, fileExtension = os.path.splitext(filepath)
            if fileExtension != '':
                indexData['fileExtension']  = fileExtension[1:].lower()

            conn.index(indexData, index, type)

            filesIndexed = filesIndexed + 1

    return filesIndexed
def getstatuteInformation(pk, parent):
    sql = "SELECT pk, statuteJurisdiction, statuteCitation, statuteInformationDeterminationDate, statuteapplicablestartdate, statuteapplicableenddate, statuteApplicableEndDateOpen FROM RightsStatementStatuteInformation WHERE fkRightsStatement = %d" % (pk)
    #print sql
    rows = databaseInterface.queryAllSQL(sql)
    for row in rows:
        statuteInformation = etree.SubElement(parent, "statuteInformation")
        etree.SubElement(statuteInformation, "statuteJurisdiction").text = row[1]
        etree.SubElement(statuteInformation, "statuteCitation").text = row[2]
        etree.SubElement(statuteInformation, "statuteInformationDeterminationDate").text = formatDate(row[3])

        #statuteNote Repeatable
        sql = "SELECT statuteNote FROM RightsStatementStatuteInformationNote WHERE fkRightsStatementStatuteInformation = %d;" % (row[0])
        rows2 = databaseInterface.queryAllSQL(sql)
        for row2 in rows2:
            etree.SubElement(statuteInformation, "statuteNote").text =  row2[0]
        
        sql = """SELECT statuteDocumentationIdentifierType, statuteDocumentationIdentifierValue, statuteDocumentationIdentifierRole FROM RightsStatementStatuteDocumentationIdentifier WHERE fkRightsStatementStatuteInformation = %s """ % (row[0])
        rows2 = databaseInterface.queryAllSQL(sql)
        for row2 in rows2:
            statuteDocumentationIdentifier = etree.SubElement(statuteInformation, "statuteDocumentationIdentifier")
            etree.SubElement(statuteDocumentationIdentifier, "statuteDocumentationIdentifierType").text = row2[0]
            etree.SubElement(statuteDocumentationIdentifier, "statuteDocumentationIdentifierValue").text = row2[1]
            etree.SubElement(statuteDocumentationIdentifier, "statuteDocumentationRole").text = row2[2]
        
        statuteapplicablestartdate =  row[4]
        statuteapplicableenddate = row[5]
        statuteApplicableEndDateOpen = row[6]
        if statuteapplicablestartdate or statuteapplicableenddate or statuteApplicableEndDateOpen:
             statuteApplicableDates = etree.SubElement(statuteInformation, "statuteApplicableDates")
             if statuteapplicablestartdate: 
                etree.SubElement(statuteApplicableDates, "startDate").text = formatDate(statuteapplicablestartdate)
             if statuteApplicableEndDateOpen:
                 etree.SubElement(statuteApplicableDates, "endDate").text = "OPEN"
             elif statuteapplicableenddate:
                 etree.SubElement(statuteApplicableDates, "endDate").text = formatDate(statuteapplicableenddate)
Ejemplo n.º 4
0
    def autoUpdateFPR(self):
        self.maxLastUpdate = self.getMaxLastUpdate()
        maxLastUpdateAtStart = self.maxLastUpdate
        databaseInterface.runSQL("SET foreign_key_checks = 0;")
        for x in [
            ("CommandRelationships", self.fprserver + "/fpr/api/v1/CommandRelationship/"),
            ("FileIDsBySingleID", self.fprserver + "/fpr/api/v1/FileIDsBySingleID/"),
            ("FileIDs", self.fprserver + "/fpr/api/v1/FileID/"),
            ("Commands", self.fprserver + "/fpr/api/v1/Command/"),
            ("CommandTypes", self.fprserver + "/fpr/api/v1/CommandType/"),
            ("CommandClassifications", self.fprserver + "/fpr/api/v1/CommandClassification/"),
            ("FileIDTypes", self.fprserver + "/fpr/api/v1/FileIDType/"),
        ]:
            table, url = x
            # params = {"format":"json", "order_by":"lastmodified", "lastmodified__gte":maxLastUpdateAtStart, "limit":"0"}
            params = {
                "format": "json",
                "order_by": "lastmodified",
                "lastmodified__gte": maxLastUpdateAtStart,
                "limit": "0",
            }
            entries = getFromRestAPI(url, params, verbose=False, auth=None)
            # print "test", entries
            for entry in entries:
                # print table, entry

                # check if it already exists
                sql = """SELECT pk FROM %s WHERE pk = '%s'""" % (table, entry["uuid"])
                if databaseInterface.queryAllSQL(sql):
                    # pass
                    continue

                if not "replaces" in entry:
                    print >> sys.stderr, "Required entry 'replaces' missing."
                    print entry
                    # continue
                    exit(3)

                # If updating a disabled entry, it will continue to be disabled.
                if entry["replaces"] != None:
                    sql = """SELECT enabled FROM %s WHERE pk = '%s';""" % (table, entry["replaces"])
                    enabled = databaseInterface.queryAllSQL(sql)[0][0]
                    if not enabled:
                        entry["enabled"] = 0
                    sql = """UPDATE %s SET enabled=FALSE WHERE pk = '%s';""" % (table, entry["replaces"])
                    databaseInterface.runSQL(sql)

                self.create(table, entry)

        addLinks()
        databaseInterface.runSQL("SET foreign_key_checks = 1;")
        if self.maxLastUpdate != maxLastUpdateAtStart:
            self.setMaxLastUpdate(self.maxLastUpdate)
Ejemplo n.º 5
0
def getAgentForFileUUID(fileUUID):
    agent = None
    rows = databaseInterface.queryAllSQL("""SELECT sipUUID, transferUUID FROM Files WHERE fileUUID = '%s';""" % (fileUUID))
    sipUUID, transferUUID = rows[0]
    if sipUUID:
        rows = databaseInterface.queryAllSQL("""SELECT variableValue FROM UnitVariables WHERE unitType = '%s' AND unitUUID = '%s' AND variable = '%s';""" % ('SIP', sipUUID, "activeAgent"))
        if len(rows):
            agent = "%s" % (rows[0])
    if transferUUID and not agent: #agent hasn't been found yet
        rows = databaseInterface.queryAllSQL("""SELECT variableValue FROM UnitVariables WHERE unitType = '%s' AND unitUUID = '%s' AND variable = '%s';""" % ("Transfer", transferUUID, "activeAgent"))
        if len(rows):
            agent = "%s" % (rows[0])
    return agent
Ejemplo n.º 6
0
def parseIdsSimple(FITS_XML, fileUUID):
    #simpleIdPlaces = [(table, tool, iter)]
    simpleIdPlaces = [
        ("FileIDsByFitsDROIDMimeType", "Droid", "{http://www.nationalarchives.gov.uk/pronom/FileCollection}MimeType"),
        ("FITS DROID PUID", "Droid", "{http://www.nationalarchives.gov.uk/pronom/FileCollection}PUID"),
        ("FileIDsByFitsFfidentMimetype", "ffident", "mimetype"),
        ("FileIDsByFitsFileUtilityMimetype", "file utility", "mimetype"),
        ("FileIDsByFitsFileUtilityFormat", "file utility", "format"),
        ("FileIDsByFitsJhoveMimeType", "Jhove", "{}mimeType"),
        ("FileIDsByFitsJhoveFormat", "Jhove", "{}format")
        
    ]
    
    for toolKey, tool, iterOn in simpleIdPlaces:
        identified = []
        fileIDs = []
        for element in FITS_XML.iter("{http://hul.harvard.edu/ois/xml/ns/fits/fits_output}tool"):
            if element.get("name") == tool:
                toolVersion = element.get("version")
                for element2 in element.getiterator(iterOn):
                    if element2.text != None:
                        if element2.text in identified:
                            continue
                        identified.append(element2.text)
                        sql = """SELECT fileID FROM FileIDsBySingleID WHERE tool = '%s' AND toolVersion='%s' AND id = '%s' AND FileIDsBySingleID.enabled = TRUE;""" % (toolKey, toolVersion, element2.text)
                        fileIDS = databaseInterface.queryAllSQL(sql)
                        if not fileIDS:
                            print "No Archivematica entry found for:", toolKey, toolVersion, element2.text
                        for fileID in fileIDS:
                            sql = """INSERT INTO FilesIdentifiedIDs (fileUUID, fileID) VALUES ('%s', '%s');""" % (fileUUID, fileID[0])
                            databaseInterface.runSQL(sql)
        if fileIDs == [] and False:
            print >>sys.stderr, "No archivematica id for: ", tool, iterOn, element2.text
                
                
    for element in FITS_XML.findall(".//{http://hul.harvard.edu/ois/xml/ns/fits/fits_output}identity[@mimetype]"):
        format = element.get("mimetype")
        if format:
            sql = """SELECT FileIDsBySingleID.fileID, FileIDs.fileIDType, FileIDsBySingleID.id FROM FileIDsBySingleID JOIN FileIDs ON FileIDsBySingleID.fileID = FileIDs.pk WHERE FileIDs.fileIDType = 'c26227f7-fca8-4d98-9d8e-cfab86a2dd0a' AND FileIDsBySingleID.id = '%s' AND FileIDsBySingleID.enabled = TRUE AND FileIDs.enabled = TRUE;""" % (format)
            fileIDS = databaseInterface.queryAllSQL(sql)
            for fileID in fileIDS:
                sql = """INSERT INTO FilesIdentifiedIDs (fileUUID, fileID) VALUES ('%s', '%s');""" % (fileUUID, fileID[0])
                databaseInterface.runSQL(sql)
    for element in FITS_XML.findall(".//{http://hul.harvard.edu/ois/xml/ns/fits/fits_output}identity[@format]"):
        format = element.get("format")
        if format:
            sql = """SELECT FileIDsBySingleID.fileID, FileIDs.fileIDType, FileIDsBySingleID.id FROM FileIDsBySingleID JOIN FileIDs ON FileIDsBySingleID.fileID = FileIDs.pk WHERE FileIDs.fileIDType = 'b0bcccfb-04bc-4daa-a13c-77c23c2bda85' AND FileIDsBySingleID.id = '%s' AND FileIDsBySingleID.enabled = TRUE AND FileIDs.enabled = TRUE;""" % (format)
            fileIDS = databaseInterface.queryAllSQL(sql)
            for fileID in fileIDS:
                sql = """INSERT INTO FilesIdentifiedIDs (fileUUID, fileID) VALUES ('%s', '%s');""" % (fileUUID, fileID[0])
                databaseInterface.runSQL(sql)
Ejemplo n.º 7
0
def addLinks():
    # rebuilds the micro-service chains used by the Transcoder based on the FPR.
    
    #Find all command relationships without links.
    CommandClassifications = {"3141bc6f-7f77-4809-9244-116b235e7330":"Normalize access",
     "3d1b570f-f500-4b3c-bbbc-4c58aad05c27":"Normalize preservation",
     "27c2969b-b6a0-441d-888d-85292b692064":"Normalize thumbnail",
     "5934dd0b-9f7c-4091-8607-47f519f5c095":"Skipable"}
    
    sql = "SELECT CommandRelationships.pk, commandClassification FROM CommandRelationships WHERE CommandRelationships.pk NOT IN (SELECT taskTypePKReference FROM TasksConfigs WHERE taskType = '5e70152a-9c5b-4c17-b823-c9298c546eeb');"
    rows = databaseInterface.queryAllSQL(sql)
    for cr, cc in rows:
        if cc not in CommandClassifications:
            print >>sys.stderr, "Invalid Command Classification (%s) for Command Relationship: %s" % (cc, cr)
        #create new taskConfig
        taskConfigPK = uuid.uuid4().__str__() 
        taskConfigDescription = CommandClassifications[cc]
        if taskConfigDescription == "Skipable":
            continue
        
        sql = """INSERT INTO TasksConfigs SET pk='%s', 
        taskType='5e70152a-9c5b-4c17-b823-c9298c546eeb',
        taskTypePKReference='%s',
        description='%s'""" % (taskConfigPK, cr, taskConfigDescription)
        databaseInterface.runSQL(sql)
        
        
        #create new link
        linkPK = uuid.uuid4().__str__() 
        ##find default
        sql = """SELECT MicroserviceChainLink FROM DefaultCommandsForClassifications WHERE forClassification = '%s';""" % (cc)
        rows2 = databaseInterface.queryAllSQL(sql)
        if not rows2:
            linkDefaultNextLink = "NULL"
        else:
            linkDefaultNextLink = "'%s'" % (rows2[0][0])
        
        sql = """INSERT INTO MicroServiceChainLinks SET pk = '%s',
        currentTask='%s',
        defaultNextChainLink = %s,
        microserviceGroup='Normalize';""" % (linkPK, taskConfigPK, linkDefaultNextLink)
        databaseInterface.runSQL(sql)
        
        #Create Exit Code
        exitCodesPK = uuid.uuid4().__str__()
        sql = """INSERT INTO MicroServiceChainLinksExitCodes SET pk = '%s',
        microServiceChainLink = '%s',
        exitCode = 0,
        nextMicroServiceChainLink = NULL;""" % (exitCodesPK, linkPK)
        databaseInterface.runSQL(sql)
def archivematicaCreateMETSRightsDspaceMDRef(fileUUID, filePath, transferUUID, itemdirectoryPath):
    ret = []
    try:
        print fileUUID, filePath
        # find the mets file
        sql = (
            "SELECT fileUUID, currentLocation FROM Files WHERE currentLocation = '%%SIPDirectory%%%s/mets.xml' AND transferUUID = '%s';"
            % (os.path.dirname(filePath), transferUUID)
        )
        rows = databaseInterface.queryAllSQL(sql)
        for row in rows:
            metsFileUUID = row[0]
            metsLoc = row[1].replace("%SIPDirectory%", "", 1)
            metsLocation = os.path.join(os.path.dirname(itemdirectoryPath), "mets.xml")
            LABEL = "mets.xml-%s" % (metsFileUUID)
            ret.append(createMDRefDMDSec(LABEL, metsLocation, metsLoc))

        base = os.path.dirname(os.path.dirname(itemdirectoryPath))
        base2 = os.path.dirname(os.path.dirname(filePath))

        for dir in os.listdir(base):
            fullDir = os.path.join(base, dir)
            fullDir2 = os.path.join(base2, dir)
            print fullDir
            if dir.startswith("ITEM"):
                print "continue"
                continue
            if not os.path.isdir(fullDir):
                continue
            sql = (
                "SELECT fileUUID, currentLocation FROM Files WHERE currentLocation = '%%SIPDirectory%%%s/mets.xml' AND transferUUID = '%s';"
                % (fullDir2, transferUUID)
            )
            print sql
            rows = databaseInterface.queryAllSQL(sql)
            for row in rows:
                print row
                metsFileUUID = row[0]
                metsLoc = row[1].replace("%SIPDirectory%", "", 1)
                metsLocation = os.path.join(fullDir, "mets.xml")
                print metsLocation
                LABEL = "mets.xml-%s" % (metsFileUUID)
                ret.append(createMDRefDMDSec(LABEL, metsLocation, metsLoc))

    except Exception as inst:
        print >> sys.stderr, "Error creating mets dspace mdref", fileUUID, filePath
        print >> sys.stderr, type(inst), inst.args
        sharedVariablesAcrossModules.globalErrorCount += 1

    return ret
def getrightsGranted(pk, parent):
    sql = (
        "SELECT RightsStatementRightsGranted.pk, act, startDate, endDate, endDateOpen FROM RightsStatementRightsGranted  WHERE fkRightsStatement = %d"
        % (pk)
    )
    rows = databaseInterface.queryAllSQL(sql)
    for row in rows:
        rightsGranted = etree.SubElement(parent, "rightsGranted")
        etree.SubElement(rightsGranted, "act").text = row[1]

        restriction = "Undefined"
        sql = (
            """SELECT restriction FROM RightsStatementRightsGrantedRestriction WHERE RightsStatementRightsGrantedRestriction.fkRightsStatementRightsGranted = %s """
            % (row[0])
        )
        rows2 = databaseInterface.queryAllSQL(sql)
        for row2 in rows2:
            restriction = row2[0]
            if not restriction.lower() in ["disallow", "conditional", "allow"]:
                print >>sys.stderr, "The value of element restriction must be: 'Allow', 'Disallow', or 'Conditional':", restriction
                sharedVariablesAcrossModules.globalErrorCount += 1
            etree.SubElement(rightsGranted, "restriction").text = restriction

        if row[2] or row[3] or row[4]:
            if restriction.lower() in ["allow"]:
                termOfGrant = etree.SubElement(rightsGranted, "termOfGrant")
            elif restriction.lower() in ["disallow", "conditional"]:
                termOfGrant = etree.SubElement(rightsGranted, "termOfRestriction")
            else:
                print >>sys.stderr, "The value of element restriction must be: 'Allow', 'Dissallow', or 'Conditional'"
                sharedVariablesAcrossModules.globalErrorCount += 1
                continue

            if row[2]:
                etree.SubElement(termOfGrant, "startDate").text = formatDate(row[2])
            if row[4]:
                etree.SubElement(termOfGrant, "endDate").text = "OPEN"
            elif row[3]:
                etree.SubElement(termOfGrant, "endDate").text = formatDate(row[3])

        # 4.1.6.4 rightsGrantedNote (O, R)
        sql = (
            "SELECT rightsGrantedNote FROM RightsStatementRightsGrantedNote WHERE fkRightsStatementRightsGranted = %d;"
            % (row[0])
        )
        rows2 = databaseInterface.queryAllSQL(sql)
        for row2 in rows2:
            etree.SubElement(rightsGranted, "rightsGrantedNote").text = row2[0]
 def alreadyNormalizedManually(self, unit, ComandClassification):
     try:
         SIPUUID = unit.owningUnit.UUID
         fileUUID = unit.UUID
         SIPPath = unit.owningUnit.currentPath
         filePath = unit.currentPath
         bname = os.path.basename(filePath)
         dirName = os.path.dirname(filePath)
         i = bname.rfind(".")
         if i != -1:
             bname = bname[:i]
         path = os.path.join(dirName, bname)
         if ComandClassification == "preservation":
             path = path.replace("%SIPDirectory%objects/", "%SIPDirectory%objects/manualNormalization/preservation/")
         elif ComandClassification == "access":
             path = path.replace("%SIPDirectory%objects/", "%SIPDirectory%objects/manualNormalization/access/")
         else:
             return False
         sql = """SELECT fileUUID FROM Files WHERE sipUUID = '%s' AND currentLocation LIKE '%s%%' AND removedTime = 0;""" % (SIPUUID, path.replace("%", "\%"))
         ret = bool(databaseInterface.queryAllSQL(sql))
         return ret 
     except Exception as inst:
         print "DEBUG EXCEPTION!"
         traceback.print_exc(file=sys.stdout)
         print type(inst)     # the exception instance
         print inst.args
Ejemplo n.º 11
0
 def getmicroServiceChainLink(self, variable, variableValue, defaultMicroServiceChainLink):
     sql = """SELECT pk, microServiceChainLink  FROM UnitVariables WHERE unitType = '%s' AND unitUUID = '%s' AND variable = '%s';""" % (self.unitType, self.UUID, variable)  
     rows = databaseInterface.queryAllSQL(sql)
     if len(rows):
         return rows[0][1]
     else:
         return defaultMicroServiceChainLink
def getFileUUIDofSourceFile(transferUUID, sourceFilePath):
    ret = ""
    sql = """SELECT fileUUID FROM Files WHERE removedTime = 0 AND transferUUID = '%s' AND currentLocation LIKE '%s%%';""" % (transferUUID, sourceFilePath.replace('%', '\%'))
    rows = databaseInterface.queryAllSQL(sql)
    if len(rows):
        ret = rows[0][0]
    return ret
Ejemplo n.º 13
0
def findExistingFileID(ext):
    description = 'A %s file' % (ext)
    sql = """SELECT pk, validPreservationFormat, validAccessFormat FROM FileIDs where fileIDType = '16ae42ff-1018-4815-aac8-cceacd8d88a8' AND description = '%s';""" % (description)
    ret = databaseInterface.queryAllSQL(sql)
    if not len(ret):
        return ""
    return ret[0]
Ejemplo n.º 14
0
def xmlCreateFileAssociationBetween(originalFileFullPath, outputFromNormalizationFileFullPath, SIPFullPath, sipUUID, eventDetailText, eventOutcomeDetailNote, outputFileUUID=""):
    #assign file UUID

    date = databaseInterface.getUTCDate()
    if outputFileUUID == "":
        outputFileUUID = uuid.uuid4().__str__()

    originalFilePathRelativeToSIP = originalFileFullPath.replace(SIPFullPath,"%SIPDirectory%", 1)
    sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(originalFilePathRelativeToSIP) + "' AND Files.sipUUID = '" + sipUUID + "';"
    print sql
    rows = databaseInterface.queryAllSQL(sql)
    print rows
    fileUUID = rows[0][0]


    filePathRelativeToSIP = outputFromNormalizationFileFullPath.replace(SIPFullPath,"%SIPDirectory%", 1)
    addFileToSIP(filePathRelativeToSIP, outputFileUUID, sipUUID, uuid.uuid4().__str__(), date, sourceType="creation", use="preservation")
    updateSizeAndChecksum(outputFileUUID, outputFromNormalizationFileFullPath, date, uuid.uuid4().__str__())

    taskUUID = uuid.uuid4().__str__()
    insertIntoEvents(fileUUID=fileUUID, \
               eventIdentifierUUID=taskUUID, \
               eventType="normalization", \
               eventDateTime=date, \
               eventDetail=eventDetailText, \
               eventOutcome="", \
               eventOutcomeDetailNote=eventOutcomeDetailNote)

    insertIntoDerivations(sourceFileUUID=fileUUID, derivedFileUUID=outputFileUUID, relatedEventUUID=taskUUID)
Ejemplo n.º 15
0
def getMaxLastUpdate():
    sql = """SELECT variableValue FROM UnitVariables WHERE unitType = 'FPR' AND unitUUID = 'Client' AND variable = 'maxLastUpdate' """
    rows = databaseInterface.queryAllSQL(sql)
    if rows:
        maxLastUpdate = rows[0][0]
    else:
        maxLastUpdate = "2000-01-01T00:00:00"
    return maxLastUpdate
Ejemplo n.º 16
0
def bridgeUserSelections():
    ""
    sql="SELECT MicroServiceChainChoice.choiceAvailableAtLink, MicroServiceChains.startingLink FROM MicroServiceChainChoice JOIN MicroServiceChains ON MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk;"
    rows = databaseInterface.queryAllSQL(sql)
    for row in rows:
        choiceAvailableAtLink, startingLink = row
        if choiceAvailableAtLink and startingLink:
            addArrow(choiceAvailableAtLink, startingLink, color='green')
Ejemplo n.º 17
0
def getDashboardUUID():
    sql = "SELECT value FROM DashboardSettings WHERE name='%s'"
    sql = sql % (MySQLdb.escape_string('dashboard_uuid'))

    rows = databaseInterface.queryAllSQL(sql)

    if len(rows) == 1:
        return rows[0][0]
def getDocumentationIdentifier(pk, parent):
    sql = "SELECT pk, copyrightDocumentationIdentifierType, copyrightDocumentationIdentifierValue, copyrightDocumentationIdentifierRole FROM RightsStatementCopyrightDocumentationIdentifier WHERE fkRightsStatementCopyrightInformation = %d" % (pk)
    rows = databaseInterface.queryAllSQL(sql)
    for row in rows:
        statuteInformation = etree.SubElement(parent, "copyrightDocumentationIdentifier")
        etree.SubElement(statuteInformation, "copyrightDocumentationIdentifierType").text = row[1]
        etree.SubElement(statuteInformation, "copyrightDocumentationIdentifierValue").text = row[2]
        etree.SubElement(statuteInformation, "copyrightDocumentationRole").text = row[3]
Ejemplo n.º 19
0
def jobChainTextGet(leadIn, pk, indent=""):
    sql = """SELECT startingLink, description FROM MicroServiceChains WHERE pk = '%s';""" % (pk.__str__())
    rows = databaseInterface.queryAllSQL(sql)
    for row in rows:
        startingLink = row[0]
        description = row[1]
        leadOut = "-->[" + description + " MicroServiceChain]"
        writePlant( ("%s \"%s\"") % (leadIn, description + " MicroServiceChain") )
        jobChainLinkTextGet(indent, leadOut, startingLink)
def getTrimAmdSec(baseDirectoryPath, fileGroupIdentifier):
    ret = etree.Element("digiprovMD")
    
    sql = "SELECT currentLocation FROM Files WHERE removedTime = 0 AND %s = '%s' AND fileGrpUse='TRIM container metadata';" % ('sipUUID', fileGroupIdentifier)
    rows = databaseInterface.queryAllSQL(sql)
    for row in rows:
        attrib = {"LABEL":"ContainerMetadata.xml", xlinkBNS + "href":row[0].replace("%SIPDirectory%", "", 1), "MDTYPE":"OTHER", "OTHERMDTYPE":"CUSTOM", 'LOCTYPE':"OTHER", 'OTHERLOCTYPE':"SYSTEM"}
        etree.SubElement(ret, "mdRef", attrib=attrib)
    return ret
Ejemplo n.º 21
0
def getFileUUIDLike(filePath, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith):
    """Dest needs to be the actual full destination path with filename."""
    ret = {}
    srcDB = filePath.replace(unitPath, unitPathReplaceWith)
    sql = "SELECT Files.fileUUID, Files.currentLocation FROM Files WHERE removedTime = 0 AND Files.currentLocation LIKE '" + MySQLdb.escape_string(srcDB) + "' AND " + unitIdentifierType + " = '" + unitIdentifier + "';"
    rows = databaseInterface.queryAllSQL(sql)
    for row in rows:
        ret[row[1]] = row[0]
    return ret
Ejemplo n.º 22
0
def bridgeLoadVariable():
    ""
    sql = "SELECT MicroServiceChainLinks.pk, TasksConfigsUnitVariableLinkPull.variable, TasksConfigsUnitVariableLinkPull.defaultMicroServiceChainLink FROM MicroServiceChainLinks JOIN TasksConfigs ON MicroServiceChainLinks.currentTask = TasksConfigs.pk JOIN TasksConfigsUnitVariableLinkPull ON TasksConfigsUnitVariableLinkPull.pk = TasksConfigs.taskTypePKReference WHERE TasksConfigs.taskType = 'c42184a3-1a7f-4c4d-b380-15d8d97fdd11';"
    rows = databaseInterface.queryAllSQL(sql)
    for row in rows:
        count = 0
        microServiceChainLink, variable, defaultMicroServiceChainLink = row
        sql = "SELECT MicroServiceChainLinks.pk, TasksConfigsSetUnitVariable.variable, TasksConfigsSetUnitVariable.microServiceChainLink  FROM MicroServiceChainLinks JOIN TasksConfigs ON MicroServiceChainLinks.currentTask = TasksConfigs.pk JOIN TasksConfigsSetUnitVariable ON TasksConfigsSetUnitVariable.pk = TasksConfigs.taskTypePKReference WHERE TasksConfigs.taskType = '6f0b612c-867f-4dfd-8e43-5b35b7f882d7' AND TasksConfigsSetUnitVariable.variable = '%s';" % (variable)
        rows2 = databaseInterface.queryAllSQL(sql)
        for row2 in rows2:
            microServiceChainLink2, variable,  microServiceChainLinkDest = row2
            addArrow(microServiceChainLink, microServiceChainLinkDest, color="brown")
            count +=1
        if defaultMicroServiceChainLink:
            addArrow(microServiceChainLink, defaultMicroServiceChainLink, color="brown")
        if count == 0:
            print "no bridge variable set for: ", linkUUIDtoNodeName[microServiceChainLink]           
    return
    def __init__(self, jobChainLink, pk, unit):
        global outputLock
        self.tasks = {}
        self.tasksLock = threading.Lock()
        self.pk = pk
        self.jobChainLink = jobChainLink
        self.exitCode = 0
        self.clearToNextLink = False

        opts = {"inputFile":"%relativeLocation%", "fileUUID":"%fileUUID%", 'commandClassifications':'%commandClassifications%', "taskUUID":"%taskUUID%", "objectsDirectory":"%SIPObjectsDirectory%", "logsDirectory":"%SIPLogsDirectory%", "sipUUID":"%SIPUUID%", "sipPath":"%SIPDirectory%", "fileGrpUse":"%fileGrpUse%", "normalizeFileGrpUse":"%normalizeFileGrpUse%", "excludeDirectory":"%excludeDirectory%", "standardErrorFile":"%standardErrorFile%", "standardOutputFile":"%standardOutputFile%"}
        
        SIPReplacementDic = unit.getReplacementDic(unit.currentPath)
        for optsKey, optsValue in opts.iteritems():
            if self.jobChainLink.passVar != None:
                if isinstance(self.jobChainLink.passVar, replacementDic):
                    opts[optsKey] = self.jobChainLink.passVar.replace(opts[optsKey])[0]

            commandReplacementDic = unit.getReplacementDic()
            for key, value in commandReplacementDic.iteritems():
                opts[optsKey] = opts[optsKey].replace(key, value)
            
            for key, value in SIPReplacementDic.iteritems():
                opts[optsKey] = opts[optsKey].replace(key, value)

        commandReplacementDic = unit.getReplacementDic()
        sql = """SELECT CommandRelationships.pk FROM CommandRelationships JOIN Commands ON CommandRelationships.command = Commands.pk WHERE CommandRelationships.pk = '%s';""" % (pk.__str__())
        rows = databaseInterface.queryAllSQL(sql)
        taskCount = 0
        tasksList = []
        if rows:
            self.tasksLock.acquire()
            for row in rows:
                UUID = uuid.uuid4().__str__()
                opts["taskUUID"] = UUID
                opts["CommandRelationship"] = pk.__str__()
                execute = "transcoder_cr%s" % (pk)
                deUnicode(execute)
                arguments = row.__str__()
                standardOutputFile = opts["standardOutputFile"] 
                standardErrorFile = opts["standardErrorFile"] 
                self.standardOutputFile = standardOutputFile 
                self.standardErrorFile = standardErrorFile
                self.execute = execute
                self.arguments = arguments
                task = taskStandard(self, execute, opts, standardOutputFile, standardErrorFile, outputLock=outputLock, UUID=UUID)
                self.tasks[UUID] = task
                databaseFunctions.logTaskCreatedSQL(self, commandReplacementDic, UUID, arguments)
                taskCount += 1
                tasksList.append(task)
            self.tasksLock.release()
            
            for task in tasksList:
                task.performTask()
        
        else:
            self.jobChainLink.linkProcessingComplete(self.exitCode)
def setMaildirFiles(sipUUID, sipPath):
    for root, dirs, files in os.walk(os.path.join(sipPath, "objects", "Maildir")):
        for file in files:
            fileRelativePath = os.path.join(root, file).replace(sipPath, "%SIPDirectory%", 1)
            sql = """SELECT fileUUID FROM Files WHERE removedTime = 0 AND sipUUID = '%s' AND currentLocation = '%s';""" % (sipUUID, fileRelativePath)
            rows = databaseInterface.queryAllSQL(sql)
            if len(rows):
                fileUUID = rows[0][0]
                sql = """INSERT INTO FilesIdentifiedIDs (fileUUID, fileID) VALUES ('%s', (SELECT pk FROM FileIDs WHERE enabled = TRUE AND description = 'A maildir email file')); """ % (fileUUID)
                databaseInterface.runSQL(sql)
def getTrimDmdSec(baseDirectoryPath, fileGroupIdentifier):
    #containerMetadata
    ret = etree.Element("dmdSec") 
    mdWrap = etree.SubElement(ret, "mdWrap")
    mdWrap.set("MDTYPE", "DC")
    xmlData = etree.SubElement(mdWrap, "xmlData")
    
    dublincore = etree.SubElement(xmlData, "dublincore", attrib=None, nsmap={None:dctermsNS})
    dublincore.set(xsiBNS+"schemaLocation", dctermsNS + " http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd")
    tree = etree.parse(os.path.join(baseDirectoryPath, "objects", "ContainerMetadata.xml"))
    root = tree.getroot()
    
    
    etree.SubElement(dublincore, dctermsBNS + "title").text = root.find("Container/TitleFreeTextPart").text
    etree.SubElement(dublincore, dctermsBNS + "provenance").text = "Department: %s; OPR: %s" % (root.find("Container/Department").text, root.find("Container/OPR").text)
    etree.SubElement(dublincore, dctermsBNS + "isPartOf").text = root.find("Container/FullClassificationNumber").text
    etree.SubElement(dublincore, dctermsBNS + "identifier").text = root.find("Container/RecordNumber").text.split('/')[-1]
    
    #get objects count
    sql = "SELECT fileUUID FROM Files WHERE removedTime = 0 AND %s = '%s' AND fileGrpUse='original';" % ('sipUUID', fileGroupIdentifier)
    rows = databaseInterface.queryAllSQL(sql)
    etree.SubElement(dublincore, dctermsBNS + "extent").text = "%d digital objects" % (len(rows))
    
    sql = "SELECT currentLocation FROM Files WHERE removedTime = 0 AND %s = '%s' AND fileGrpUse='TRIM file metadata';" % ('sipUUID', fileGroupIdentifier)
    rows = databaseInterface.queryAllSQL(sql)
    
    minDateMod =  None
    maxDateMod =  None
    for row in rows:
        fileMetadataXmlPath = row[0].replace('%SIPDirectory%', baseDirectoryPath, 1)
        if os.path.isfile(fileMetadataXmlPath):
            tree2 = etree.parse(fileMetadataXmlPath)
            root2 = tree2.getroot()
            dateMod = root2.find("Document/DateModified").text
            if minDateMod ==  None or dateMod < minDateMod:
               minDateMod = dateMod
            if maxDateMod ==  None or dateMod > maxDateMod:
               maxDateMod = dateMod

    etree.SubElement(dublincore, dctermsBNS + "date").text = "%s/%s" % (minDateMod, maxDateMod)
    
    #print etree.tostring(dublincore, pretty_print = True)
    return ret
Ejemplo n.º 26
0
def bridgeExitCodes():
    ""
    global allLinks
    sql = """SELECT microServiceChainLink, nextMicroServiceChainLink FROM MicroServiceChainLinksExitCodes;"""
    links = databaseInterface.queryAllSQL(sql)
    for link in links:
        microServiceChainLink, nextMicroServiceChainLink = link
        if nextMicroServiceChainLink:
            addArrow(microServiceChainLink, nextMicroServiceChainLink)
    return
Ejemplo n.º 27
0
def printFidoInsert(itemdirectoryPath):
    global runSQLInserts
    ext = findExtension(itemdirectoryPath).lower()
    if not ext:
        return

    fileID = findExistingFileID(ext)
    if not fileID:
        return
    fileID, validPreservationFormat, validAccessFormat = fileID

    FidoFileID = getFidoID(itemdirectoryPath).strip()
    if not FidoFileID:
        return

    # check for existing rule
    sql = (
        """SELECT pk FROM FileIDs WHERE fileIDType = 'afdbee13-eec5-4182-8c6c-f5638ee290f3' AND description = '%s';"""
        % FidoFileID
    )
    if databaseInterface.queryAllSQL(sql):
        a = "skip"
        # return
    if FidoFileID in idsDone:
        return

    fileIDUUID = uuid.uuid4().__str__()

    sql = """INSERT INTO FileIDs (pk, description, validPreservationFormat, validAccessFormat, fileIDType) 
        VALUES ('%s', '%s', %s, %s, 'afdbee13-eec5-4182-8c6c-f5638ee290f3');""" % (
        fileIDUUID,
        FidoFileID,
        validPreservationFormat,
        validAccessFormat,
    )
    idsDone.append(FidoFileID)
    print sql
    if runSQLInserts:
        databaseInterface.runSQL(sql)

    FileIDsBySingleIDUUID = uuid.uuid4().__str__()
    sql = """INSERT INTO FileIDsBySingleID  (pk, fileID, id, tool, toolVersion)
        VALUES ('%s', '%s', '%s', 'Fido', '1.1.2');""" % (
        FileIDsBySingleIDUUID,
        fileIDUUID,
        FidoFileID,
    )
    print sql

    if runSQLInserts:
        databaseInterface.runSQL(sql)

    printNewCommandRelationships(fileID, fileIDUUID)

    print
Ejemplo n.º 28
0
def setMaxLastUpdate(maxLastUpdate):
    sql = """SELECT pk FROM UnitVariables WHERE unitType = 'FPR' AND unitUUID = 'Client' AND variable = 'maxLastUpdate'; """
    rows = databaseInterface.queryAllSQL(sql)
    if rows:
        sql = """UPDATE UnitVariables SET variableValue='%s' WHERE unitType = 'FPR' AND unitUUID = 'Client' AND variable = 'maxLastUpdate';""" % (maxLastUpdate)
        databaseInterface.runSQL(sql)
    else:
        pk = uuid.uuid4().__str__()
        sql = """INSERT INTO UnitVariables SET pk='%s', variableValue='%s', unitType='FPR', unitUUID = 'Client', variable = 'maxLastUpdate';""" % (pk, maxLastUpdate)
        databaseInterface.runSQL(sql)
    return maxLastUpdate
 def __init__(self, commandLinker, replacementDic, opts, onSuccess):
     self.pk = commandLinker
     self.replacementDic = replacementDic
     self.opts = opts
     self.onSuccess = onSuccess
     sql =  "SELECT command FROM CommandRelationships where pk = %s;" % (self.pk.__str__())
     rows = databaseInterface.queryAllSQL(sql)
     if rows:
         for row in rows:
             self.command = row[0]
     self.commandObject = Command(self.command.__str__(), replacementDic, self.onSuccess, opts)
Ejemplo n.º 30
0
def getArchivematicaFileID(FidoFileID, FidoVersion):
    sql = """SELECT fileID FROM FileIDsBySingleID 
            WHERE 
                tool = 'Fido' 
                AND toolVersion = '%s'
                AND id='%s';""" % (FidoVersion, FidoFileID)
    ret = databaseInterface.queryAllSQL(sql)
    if not len(ret):
        print >>sys.stderr, "No Archivematica format id for Fido %s: %s" % (FidoVersion, FidoFileID)
        exit(0)
    return ret[0][0]
Ejemplo n.º 31
0
def bridgeMagicChainLinks():
    ""
    #find the assignments
    sql = "SELECT MicroServiceChainLinks.pk, TasksConfigsAssignMagicLink.execute FROM MicroServiceChainLinks JOIN TasksConfigs ON MicroServiceChainLinks.currentTask = TasksConfigs.pk JOIN TasksConfigsAssignMagicLink ON TasksConfigsAssignMagicLink.pk = TasksConfigs.taskTypePKReference WHERE TasksConfigs.taskType = '3590f73d-5eb0-44a0-91a6-5b2db6655889';"
    rows = databaseInterface.queryAllSQL(sql)
    
    for row in rows:
        microServiceChainLink, magicLink = row
        node = G.get_node(linkUUIDtoNodeName[microServiceChainLink])
    
        visitedNodes = {node:None} #prevents looping    
        count = bridgeMagicChainLinksRecursiveAssist(node, magicLink, visitedNodes)
        if count == 0:
            print "no loads of set link: ", node    
    return
Ejemplo n.º 32
0
def xmlCreateFileAssociationBetween(originalFileFullPath,
                                    outputFromNormalizationFileFullPath,
                                    SIPFullPath,
                                    sipUUID,
                                    eventDetailText,
                                    eventOutcomeDetailNote,
                                    outputFileUUID=""):
    #assign file UUID

    date = databaseInterface.getUTCDate()
    if outputFileUUID == "":
        outputFileUUID = uuid.uuid4().__str__()

    originalFilePathRelativeToSIP = originalFileFullPath.replace(
        SIPFullPath, "%SIPDirectory%", 1)
    sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(
        originalFilePathRelativeToSIP
    ) + "' AND Files.sipUUID = '" + sipUUID + "';"
    print sql
    rows = databaseInterface.queryAllSQL(sql)
    print rows
    fileUUID = rows[0][0]

    filePathRelativeToSIP = outputFromNormalizationFileFullPath.replace(
        SIPFullPath, "%SIPDirectory%", 1)
    addFileToSIP(filePathRelativeToSIP,
                 outputFileUUID,
                 sipUUID,
                 uuid.uuid4().__str__(),
                 date,
                 sourceType="creation",
                 use="preservation")
    updateSizeAndChecksum(outputFileUUID, outputFromNormalizationFileFullPath,
                          date,
                          uuid.uuid4().__str__())

    taskUUID = uuid.uuid4().__str__()
    insertIntoEvents(fileUUID=fileUUID, \
               eventIdentifierUUID=taskUUID, \
               eventType="normalization", \
               eventDateTime=date, \
               eventDetail=eventDetailText, \
               eventOutcome="", \
               eventOutcomeDetailNote=eventOutcomeDetailNote)

    insertIntoDerivations(sourceFileUUID=fileUUID,
                          derivedFileUUID=outputFileUUID,
                          relatedEventUUID=taskUUID)
Ejemplo n.º 33
0
def updateFileLocation2(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith):
    """Dest needs to be the actual full destination path with filename."""
    srcDB = src.replace(unitPath, unitPathReplaceWith)
    dstDB = dst.replace(unitPath, unitPathReplaceWith)
    sql = "SELECT Files.fileUUID, Files.currentLocation FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(srcDB) + "' AND " + unitIdentifierType + " = '" + unitIdentifier + "';"
    rows = databaseInterface.queryAllSQL(sql)
    if len(rows) != 1:
        print sys.stderr, len(rows), "rows", sql, rows
        exit(4)
    for row in rows:
        fileUUID = row[0]
        location = row[1]
        sql =  """UPDATE Files SET currentLocation='%s' WHERE fileUUID='%s';""" % (MySQLdb.escape_string(dstDB), fileUUID)
        databaseInterface.runSQL(sql)
    print "moving: ", src, dst
    shutil.move(src, dst)
def connect_and_change_transfer_file_status(uuid, status):
    # get file UUIDs for each file in the SIP
    sql = "SELECT fileUUID from Files WHERE transferUUID='" + MySQLdb.escape_string(uuid) + "'"

    rows = databaseInterface.queryAllSQL(sql)

    if len(rows) > 0:
        conn = connect_and_create_index('transfers')

        # cycle through file UUIDs and update status
        for row in rows:
            documents = conn.search_raw(query=pyes.FieldQuery(pyes.FieldParameter('fileuuid', row[0])))
            if len(documents['hits']['hits']) > 0:
                document_id = documents['hits']['hits'][0]['_id']
                conn.update({'status': status}, 'transfers', 'transferfile', document_id)
    return len(rows)
Ejemplo n.º 35
0
def addAccessionEvent(fileUUID, transferUUID, date):

    sql = """SELECT accessionID FROM Transfers WHERE transferUUID = '%s';""" % (
        transferUUID)
    accessionID = databaseInterface.queryAllSQL(sql)[0][0]
    if accessionID:
        eventIdentifierUUID = uuid.uuid4().__str__()
        eventOutcomeDetailNote = "accession#" + MySQLdb.escape_string(
            accessionID)
        insertIntoEvents(fileUUID=fileUUID, \
               eventIdentifierUUID=eventIdentifierUUID, \
               eventType="registration", \
               eventDateTime=date, \
               eventDetail="", \
               eventOutcome="", \
               eventOutcomeDetailNote=eventOutcomeDetailNote)
Ejemplo n.º 36
0
def connect_and_remove_sip_transfer_files(uuid):
    # get file UUIDs for each file in the SIP
    sql = "SELECT fileUUID from Files WHERE sipUUID='" + MySQLdb.escape_string(
        uuid) + "'"

    rows = databaseInterface.queryAllSQL(sql)

    if len(rows) > 0:
        conn = connect_and_create_index('transfers')

        # cycle through file UUIDs and delete files from transfer backlog
        for row in rows:
            documents = conn.search_raw(
                query=pyes.FieldQuery(pyes.FieldParameter('fileuuid', row[0])))
            if len(documents['hits']['hits']) > 0:
                document_id = documents['hits']['hits'][0]['_id']
                conn.delete('transfers', 'transferfile', document_id)
Ejemplo n.º 37
0
def bridgeMagicChainLinksRecursiveAssist(node, magicLink, visitedNodes):
    ""
    ret = 0
    link = node[1:node.find('}')]
    sql = "SELECT MicroServiceChainLinks.pk FROM MicroServiceChainLinks JOIN TasksConfigs ON MicroServiceChainLinks.currentTask = TasksConfigs.pk WHERE TasksConfigs.taskType = '6fe259c2-459d-4d4b-81a4-1b9daf7ee2e9' AND MicroServiceChainLinks.pk = '%s';" % (link)
    #if it's loading it, set the load and return
    rows = databaseInterface.queryAllSQL(sql)
    if len(rows):
        addArrow(link, magicLink, color="brown")
        return 1
    else:
        for neigh in G.neighbors_iter(node):
            if neigh in visitedNodes:
                continue
            visitedNodes[neigh] = None
            ret += bridgeMagicChainLinksRecursiveAssist(neigh, magicLink, visitedNodes)
    return ret
Ejemplo n.º 38
0
def createWatchedDirectories():
    global processedJobChainLinks
    sql = """SELECT watchedDirectoryPath, chain, expectedType FROM WatchedDirectories;"""
    rows = databaseInterface.queryAllSQL(sql)
    i = 1
    for row in rows:
        watchedDirectoryPath = row[0]
        chain = row[1]
        expectedType = row[2]
        writePlant(
            "@startuml WatchedDirectory-",
            watchedDirectoryPath.replace("%watchDirectoryPath%", "").replace(
                "/", "_") + ".png")  #img/activity_img10.png
        writePlant("title " + watchedDirectoryPath)
        jobChainTextGet("(*) --> [" + watchedDirectoryPath + "]", chain)
        writePlant("@enduml")
        i += 1
Ejemplo n.º 39
0
def printFidoInsert(itemdirectoryPath):
    global runSQLInserts
    ext = findExtension(itemdirectoryPath).lower()
    if not ext:
        return

    fileID = findExistingFileID(ext)
    if not fileID:
        return
    fileID, validPreservationFormat, validAccessFormat = fileID

    FidoFileID = getFidoID(itemdirectoryPath).strip()
    if not FidoFileID:
        return

    #check for existing rule
    sql = """SELECT pk FROM FileIDs WHERE fileIDType = 'afdbee13-eec5-4182-8c6c-f5638ee290f3' AND description = '%s';""" % FidoFileID
    if databaseInterface.queryAllSQL(sql):
        a = "skip"
        #return
    if FidoFileID in idsDone:
        return

    fileIDUUID = uuid.uuid4().__str__()

    sql = """INSERT INTO FileIDs (pk, description, validPreservationFormat, validAccessFormat, fileIDType) 
        VALUES ('%s', '%s', %s, %s, 'afdbee13-eec5-4182-8c6c-f5638ee290f3');""" % (
        fileIDUUID, FidoFileID, validPreservationFormat, validAccessFormat)
    idsDone.append(FidoFileID)
    print sql
    if runSQLInserts:
        databaseInterface.runSQL(sql)

    FileIDsBySingleIDUUID = uuid.uuid4().__str__()
    sql = """INSERT INTO FileIDsBySingleID  (pk, fileID, id, tool, toolVersion)
        VALUES ('%s', '%s', '%s', 'Fido', '1.1.2');""" % (
        FileIDsBySingleIDUUID, fileIDUUID, FidoFileID)
    print sql

    if runSQLInserts:
        databaseInterface.runSQL(sql)

    printNewCommandRelationships(fileID, fileIDUUID)

    print
Ejemplo n.º 40
0
def printTikaInsert(itemdirectoryPath):
    global runSQLInserts
    ext = findExtension(itemdirectoryPath).lower()
    if not ext:
        return

    fileID = findExistingFileID(ext)
    if not fileID:
        return
    fileID, validPreservationFormat, validAccessFormat = fileID

    tikaFileID = getTikaID(itemdirectoryPath).strip()
    if not tikaFileID:
        return

    #check for existing rule
    sql = """SELECT pk FROM FileIDs WHERE fileIDType = '1d8f3bb3-da8a-4ef6-bac7-b65942df83fc' AND description = '%s';""" % tikaFileID
    if databaseInterface.queryAllSQL(sql):
        a = "skip"
        #return
    if tikaFileID in idsDone:
        return

    fileIDUUID = uuid.uuid4().__str__()

    sql = """INSERT INTO FileIDs (pk, description, validPreservationFormat, validAccessFormat, fileIDType) 
        VALUES ('%s', '%s', %s, %s, '1d8f3bb3-da8a-4ef6-bac7-b65942df83fc');""" % (
        fileIDUUID, tikaFileID, validPreservationFormat, validAccessFormat)
    idsDone.append(tikaFileID)
    print sql
    if runSQLInserts:
        databaseInterface.runSQL(sql)

    FileIDsBySingleIDUUID = uuid.uuid4().__str__()
    sql = """INSERT INTO FileIDsBySingleID  (pk, fileID, id, tool, toolVersion)
        VALUES ('%s', '%s', '%s', 'Tika', '1.3');""" % (FileIDsBySingleIDUUID,
                                                        fileIDUUID, tikaFileID)
    print sql

    if runSQLInserts:
        databaseInterface.runSQL(sql)

    printNewCommandRelationships(fileID, fileIDUUID)

    print
Ejemplo n.º 41
0
def createLoadMagic():
    global processedJobChainLinks
    sql = """SELECT TasksConfigs.description, StandardTasksConfigs.execute FROM TasksConfigs JOIN StandardTasksConfigs ON TasksConfigs.taskTypePKReference = StandardTasksConfigs.pk WHERE TasksConfigs.taskType = 3;"""
    rows = databaseInterface.queryAllSQL(sql)
    i = 1
    for row in rows:
        description = row[0]
        chainLink = row[1]
        processedJobChainLinks = []
        writePlant("@startuml LoadMagicLink-", description, "-", chainLink,
                   ".png")  #img/activity_img10.png
        writePlant("title ", description, "-", chainLink)
        jobChainLinkTextGet("",
                            "(*) --> [" + description + "]",
                            int(chainLink),
                            label="")
        writePlant("@enduml")
        i += 1
Ejemplo n.º 42
0
def loadAllLinks():
    ""
    sql = """SELECT MicroServiceChainLinks.pk, MicroServiceChainLinks.defaultNextChainLink, TasksConfigs.description 
        FROM MicroServiceChainLinks 
        JOIN TasksConfigs ON currentTask = TasksConfigs.pk
        WHERE TasksConfigs.taskType != '5e70152a-9c5b-4c17-b823-c9298c546eeb';"""
    links = databaseInterface.queryAllSQL(sql)
    for link in links:
        pk, defaultNextChainLink, description = link
        if pk in excludedNodes:
            continue
        nodeName = "{%s}%s" % (pk, description)
        G.add_node(nodeName, URL="MicroServiceChainLinks/%s" % pk, label=nodeName, id=nodeName)
        linkUUIDtoNodeName[pk] = nodeName
    for link in links:
        pk, defaultNextChainLink, description = link
        if defaultNextChainLink != None:
            addArrow(pk, defaultNextChainLink)
    return
Ejemplo n.º 43
0
def startThread(threadNumber):
    """Setup a gearman client, for the thread."""
    gm_worker = gearman.GearmanWorker(
        [config.get('MCPClient', "MCPArchivematicaServer")])
    hostID = gethostname() + "_" + threadNumber.__str__()
    gm_worker.set_client_id(hostID)
    for key in supportedModules.iterkeys():
        printOutputLock.acquire()
        print "registering:", '"' + key + '"'
        printOutputLock.release()
        gm_worker.register_task(key, executeCommand)

    #load transoder jobs
    sql = """SELECT CommandRelationships.pk 
                FROM CommandRelationships 
                JOIN Commands ON CommandRelationships.command = Commands.pk
                JOIN CommandsSupportedBy ON Commands.supportedBy = CommandsSupportedBy.pk 
                WHERE CommandsSupportedBy.description = 'supported by default archivematica client';"""
    rows = databaseInterface.queryAllSQL(sql)
    if rows:
        for row in rows:
            CommandRelationshipsPK = row[0]
            key = "transcoder_cr%s" % (CommandRelationshipsPK.__str__())
            printOutputLock.acquire()
            print "registering:", '"' + key + '"'
            printOutputLock.release()
            gm_worker.register_task(
                key, transcoderNormalizer.executeCommandReleationship)

    failMaxSleep = 30
    failSleep = 1
    failSleepIncrementor = 2
    while True:
        try:
            gm_worker.work()
        except gearman.errors.ServerUnavailable as inst:
            print >> sys.stderr, inst.args
            print >> sys.stderr, "Retrying in %d seconds." % (failSleep)
            time.sleep(failSleep)
            if failSleep < failMaxSleep:
                failSleep += failSleepIncrementor
Ejemplo n.º 44
0
 def setVariable(self, variable, variableValue, microServiceChainLink):
     if not variableValue:
         variableValue = ""
     if not microServiceChainLink:
         microServiceChainLink = "NULL"
     else:
         microServiceChainLink = "'%s'" % (microServiceChainLink)
     variableValue = databaseInterface.MySQLdb.escape_string(variableValue)
     sql = """SELECT pk FROM UnitVariables WHERE unitType = '%s' AND unitUUID = '%s' AND variable = '%s';""" % (
         self.unitType, self.UUID, variable)
     rows = databaseInterface.queryAllSQL(sql)
     if rows:
         for row in rows:
             sql = """UPDATE UnitVariables SET variable='%s', variableValue='%s', microServiceChainLink=%s WHERE pk = '%s'; """ % (
                 variable, variableValue, microServiceChainLink, row[0])
             databaseInterface.runSQL(sql)
     else:
         sql = """INSERT INTO UnitVariables (pk, unitType, unitUUID, variable, variableValue, microserviceChainLink) VALUES ('%s', '%s', '%s', '%s', '%s', %s);""" % (
             uuid.uuid4().__str__(), self.unitType, self.UUID, variable,
             variableValue, microServiceChainLink)
         databaseInterface.runSQL(sql)
Ejemplo n.º 45
0
def removePreservationFiles(SIPDirectory, SIPUUID):
    try:
        sql = """SELECT fileUUID, currentLocation FROM Files WHERE SIPUUID = '%s' AND removedTime = 0 AND fileGrpUse = 'preservation';""" % (
            SIPUUID)
        files = databaseInterface.queryAllSQL(sql)
        for file in files:
            try:
                fileUUID, currentLocation = file
                sql = """UPDATE Files SET removedTime=NOW() WHERE fileUUID = '%s';""" % (
                    fileUUID)
                databaseInterface.runSQL(sql)
                os.remove(
                    currentLocation.replace("%SIPDirectory%", SIPDirectory, 1))
            except Exception as inst:
                traceback.print_exc(file=sys.stdout)
                print type(inst)  # the exception instance
                print inst.args
    except Exception as inst:
        traceback.print_exc(file=sys.stdout)
        print type(inst)  # the exception instance
        print inst.args
Ejemplo n.º 46
0
def updateDirectoryLocation(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith):
    srcDB = src.replace(unitPath, unitPathReplaceWith)
    if not srcDB.endswith("/") and srcDB != unitPathReplaceWith:
        srcDB += "/"
    dstDB = dst.replace(unitPath, unitPathReplaceWith)
    if not dstDB.endswith("/") and dstDB != unitPathReplaceWith:
        dstDB += "/"
    sql = "SELECT Files.fileUUID, Files.currentLocation FROM Files WHERE removedTime = 0 AND Files.currentLocation LIKE '" + MySQLdb.escape_string(srcDB) + "%' AND " + unitIdentifierType + " = '" + unitIdentifier + "';"
    rows = databaseInterface.queryAllSQL(sql)
    for row in rows:
        fileUUID = row[0]
        location = row[1]
        destDB = location.replace(srcDB, dstDB) 
        sql =  """UPDATE Files SET currentLocation='%s' WHERE fileUUID='%s';""" % (MySQLdb.escape_string(destDB), fileUUID)
        databaseInterface.runSQL(sql)
    if os.path.isdir(dst):
        if dst.endswith("/"):
            dst += "."
        else:
            dst += "/."
    print "moving: ", src, dst
    shutil.move(src, dst)
Ejemplo n.º 47
0
def findOrCreateSipInDB(path, waitSleep=dbWaitSleep):
    """Matches a directory to a database sip by it's appended UUID, or path. If it doesn't find one, it will create one"""
    UUID = ""
    path = path.replace(config.get('MCPServer', "sharedDirectory"),
                        "%sharedPath%", 1)

    #find UUID on end of SIP path
    uuidLen = -36
    if isUUID(path[uuidLen - 1:-1]):
        UUID = path[uuidLen - 1:-1]
        sql = """SELECT sipUUID FROM SIPs WHERE sipUUID = '""" + UUID + "';"
        rows = databaseInterface.queryAllSQL(sql)
        if not rows:
            databaseFunctions.createSIP(path, UUID=UUID)

    if UUID == "":
        #Find it in the database
        sql = """SELECT sipUUID FROM SIPs WHERE currentPath = '""" + MySQLdb.escape_string(
            path) + "';"
        #if waitSleep != 0:
        #time.sleep(waitSleep) #let db be updated by the microservice that moved it.
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        if not row:
            print "Not opening existing SIP:", UUID, "-", path
        while row != None:
            UUID = row[0]
            print "Opening existing SIP:", UUID, "-", path
            row = c.fetchone()
        sqlLock.release()

    #Create it
    if UUID == "":
        UUID = databaseFunctions.createSIP(path)
        print "DEBUG creating sip", path, UUID
    return UUID
Ejemplo n.º 48
0
def getTrimFileDmdSec(baseDirectoryPath, fileGroupIdentifier, fileUUID):
    ret = etree.Element("dmdSec") 
    mdWrap = etree.SubElement(ret, "mdWrap")
    mdWrap.set("MDTYPE", "DC")
    xmlData = etree.SubElement(mdWrap, "xmlData")
    
    
    
    sql = "SELECT currentLocation FROM Files WHERE removedTime = 0 AND %s = '%s' AND fileGrpUse='TRIM file metadata' AND fileGrpUUID = '%s';" % ('sipUUID', fileGroupIdentifier, fileUUID)
    rows = databaseInterface.queryAllSQL(sql)
    if (len(rows) != 1):
        print >>sys.stderr, "no metadata for original file: ", fileUUID
        return None
    for row in rows:
        xmlFilePath = row[0].replace('%SIPDirectory%', baseDirectoryPath, 1)
        dublincore = etree.SubElement(xmlData, "dublincore", attrib=None, nsmap={None:dctermsNS})
        tree = etree.parse(os.path.join(baseDirectoryPath, xmlFilePath))
        root = tree.getroot()
        
        etree.SubElement(dublincore, dctermsBNS + "title").text = root.find("Document/TitleFreeTextPart").text
        etree.SubElement(dublincore, dctermsBNS + "date").text = root.find("Document/DateModified").text
        etree.SubElement(dublincore, dctermsBNS + "identifier").text = root.find("Document/RecordNumber").text
        
    return ret
Ejemplo n.º 49
0
def storeReport(content, type, name, UUID):
    sql = """INSERT INTO Reports (content, unitType, unitName, unitIdentifier) VALUES (%s, %s, %s, %s)"""
    databaseInterface.queryAllSQL(sql, (content, type, name, UUID))
Ejemplo n.º 50
0
def createFileSec(directoryPath, structMapDiv):
    global fileNameToFileID
    global trimStructMap
    global trimStructMapObjects
    global globalDmdSecCounter
    global globalAmdSecCounter
    global globalDigiprovMDCounter
    global dmdSecs
    global amdSecs

    delayed = []
    filesInThisDirectory = []
    dspaceMetsDMDID = None
    directoryContents = os.listdir(directoryPath)
    directoryContentsTuples = []
    for item in directoryContents:
        itemdirectoryPath = os.path.join(directoryPath, item)
        if os.path.isdir(itemdirectoryPath):
            delayed.append(item)

        elif os.path.isfile(itemdirectoryPath):
            #find original file name
            directoryPathSTR = itemdirectoryPath.replace(
                baseDirectoryPath, baseDirectoryPathString, 1)
            sql = """SELECT Related.originalLocation AS 'derivedFromOriginalLocation', 
                            Current.originalLocation
                        FROM Files AS Current 
                        LEFT OUTER JOIN Derivations ON Current.fileUUID = Derivations.derivedFileUUID 
                        LEFT OUTER JOIN Files AS Related ON Derivations.sourceFileUUID = Related.fileUUID
                        WHERE Current.removedTime = 0 AND Current.%s = '%s' 
                            AND Current.currentLocation = '%s';""" % (
                fileGroupType, fileGroupIdentifier,
                MySQLdb.escape_string(directoryPathSTR))
            c, sqlLock = databaseInterface.querySQL(sql)
            row = c.fetchone()
            if row == None:
                print >> sys.stderr, "No uuid for file: \"", directoryPathSTR, "\""
                sharedVariablesAcrossModules.globalErrorCount += 1
                sqlLock.release()
                continue
            while row != None:
                #add to files in this directory tuple list
                derivedFromOriginalName = row[0]
                originalLocation = row[1]
                if derivedFromOriginalName != None:
                    originalLocation = derivedFromOriginalName
                originalName = os.path.basename(
                    originalLocation
                ) + u"/"  #+ u"/" keeps normalized after original / is very uncommon in a file name
                directoryContentsTuples.append((
                    originalName,
                    item,
                ))
                row = c.fetchone()
            sqlLock.release()

    #order files by their original name
    for originalName, item in sorted(
            directoryContentsTuples,
            key=lambda listItems: listItems[0],
            cmp=sharedVariablesAcrossModules.collator.compare):
        #item = unicode(item)
        itemdirectoryPath = os.path.join(directoryPath, item)

        #myuuid = uuid.uuid4()
        myuuid = ""
        DMDIDS = ""
        #directoryPathSTR = itemdirectoryPath.replace(baseDirectoryPath + "objects", "objects", 1)
        directoryPathSTR = itemdirectoryPath.replace(baseDirectoryPath,
                                                     baseDirectoryPathString,
                                                     1)

        sql = """SELECT fileUUID, fileGrpUse, fileGrpUUID, Files.transferUUID, label, originalLocation, Transfers.type 
                FROM Files
                LEFT OUTER JOIN Transfers ON Files.transferUUID = Transfers.transferUUID
                WHERE removedTime = 0 AND %s = '%s' AND Files.currentLocation = '%s';""" % (
            fileGroupType, fileGroupIdentifier,
            MySQLdb.escape_string(directoryPathSTR))
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        if row == None:
            print >> sys.stderr, "No uuid for file: \"", directoryPathSTR, "\""
            sharedVariablesAcrossModules.globalErrorCount += 1
            sqlLock.release()
            continue
        while row != None:
            myuuid = row[0]
            use = row[1]
            fileGrpUUID = row[2]
            transferUUID = row[3]
            label = row[4]
            originalLocation = row[5]
            typeOfTransfer = row[6]
            row = c.fetchone()
        sqlLock.release()

        filename = ''.join(quoteattr(item).split("\"")[1:-1])
        directoryPathSTR = itemdirectoryPath.replace(baseDirectoryPath, "", 1)
        #print filename, directoryPathSTR

        if typeOfTransfer == "TRIM" and trimStructMap == None:
            trimStructMap = etree.Element("structMap",
                                          attrib={
                                              "TYPE": "logical",
                                              "ID": "structMap_2",
                                              "LABEL":
                                              "Hierarchical arrangement"
                                          })
            trimStructMapObjects = etree.SubElement(trimStructMap,
                                                    "div",
                                                    attrib={
                                                        "TYPE": "File",
                                                        "LABEL": "objects"
                                                    })

            trimDmdSec = getTrimDmdSec(baseDirectoryPath, fileGroupIdentifier)
            globalDmdSecCounter += 1
            dmdSecs.append(trimDmdSec)
            ID = "dmdSec_" + globalDmdSecCounter.__str__()
            trimDmdSec.set("ID", ID)
            trimStructMapObjects.set("DMDID", ID)

            # ==

            trimAmdSec = etree.Element("amdSec")
            globalAmdSecCounter += 1
            amdSecs.append(trimAmdSec)
            ID = "amdSec_" + globalAmdSecCounter.__str__()
            trimAmdSec.set("ID", ID)

            digiprovMD = getTrimAmdSec(baseDirectoryPath, fileGroupIdentifier)
            globalDigiprovMDCounter += 1
            digiprovMD.set("ID",
                           "digiprovMD_" + globalDigiprovMDCounter.__str__())

            trimAmdSec.append(digiprovMD)

            trimStructMapObjects.set("ADMID", ID)

        FILEID = "%s-%s" % (item, myuuid)
        if FILEID[0].isdigit():
            FILEID = "_" + FILEID

        #<fptr FILEID="file1-UUID"/>
        fileDiv = etree.SubElement(structMapDiv, "div")
        if label != None:
            fileDiv.set("LABEL", label)
        fileDiv.set("TYPE", "Item")
        newChild(fileDiv, "fptr", sets=[("FILEID", FILEID)])
        fileNameToFileID[item] = FILEID

        GROUPID = ""
        if fileGrpUUID:
            GROUPID = "Group-%s" % (fileGrpUUID)
            if use == "TRIM file metadata":
                use = "metadata"

        elif use == "original" or use == "submissionDocumentation" or use == "metadata" or use == "maildirFile":
            GROUPID = "Group-%s" % (myuuid)
            if use == "maildirFile":
                use = "original"
            if use == "original":
                DMDIDS = createDMDIDSFromCSVParsedMetadataFiles(
                    originalLocation.replace('%transferDirectory%', "", 1))
                if DMDIDS:
                    fileDiv.set("DMDID", DMDIDS)
                if typeOfTransfer == "TRIM":
                    trimFileDiv = etree.SubElement(trimStructMapObjects,
                                                   "div",
                                                   attrib={"TYPE": "Item"})

                    trimFileDmdSec = getTrimFileDmdSec(baseDirectoryPath,
                                                       fileGroupIdentifier,
                                                       myuuid)
                    globalDmdSecCounter += 1
                    dmdSecs.append(trimFileDmdSec)
                    ID = "dmdSec_" + globalDmdSecCounter.__str__()
                    trimFileDmdSec.set("ID", ID)

                    trimFileDiv.set("DMDID", ID)

                    etree.SubElement(trimFileDiv,
                                     "fptr",
                                     attrib={"FILEID": FILEID})

        elif use == "preservation":
            sql = "SELECT * FROM Derivations WHERE derivedFileUUID = '" + myuuid + "';"
            c, sqlLock = databaseInterface.querySQL(sql)
            row = c.fetchone()
            while row != None:
                GROUPID = "Group-%s" % (row[1])
                row = c.fetchone()
            sqlLock.release()

        elif use == "license" or use == "text/ocr" or use == "DSPACEMETS":
            sql = """SELECT fileUUID FROM Files WHERE removedTime = 0 AND %s = '%s' AND fileGrpUse = 'original' AND originalLocation LIKE '%s/%%'""" % (
                fileGroupType, fileGroupIdentifier,
                MySQLdb.escape_string(
                    os.path.dirname(originalLocation)).replace("%", "\%"))
            c, sqlLock = databaseInterface.querySQL(sql)
            row = c.fetchone()
            while row != None:
                GROUPID = "Group-%s" % (row[0])
                row = c.fetchone()
            sqlLock.release()

        elif use == "service":
            fileFileIDPath = itemdirectoryPath.replace(
                baseDirectoryPath + "objects/service/",
                baseDirectoryPathString + "objects/")
            objectNameExtensionIndex = fileFileIDPath.rfind(".")
            fileFileIDPath = fileFileIDPath[:objectNameExtensionIndex + 1]
            sql = """SELECT fileUUID FROM Files WHERE removedTime = 0 AND %s = '%s' AND fileGrpUse = 'original' AND currentLocation LIKE '%s%%'""" % (
                fileGroupType, fileGroupIdentifier,
                MySQLdb.escape_string(fileFileIDPath.replace("%", "\%")))
            c, sqlLock = databaseInterface.querySQL(sql)
            row = c.fetchone()
            while row != None:
                GROUPID = "Group-%s" % (row[0])
                row = c.fetchone()
            sqlLock.release()

        elif use == "TRIM container metadata":
            GROUPID = "Group-%s" % (myuuid)
            use = "metadata"

        if transferUUID:
            sql = "SELECT type FROM Transfers WHERE transferUUID = '%s';" % (
                transferUUID)
            rows = databaseInterface.queryAllSQL(sql)
            if rows[0][0] == "Dspace":
                if use == "DSPACEMETS":
                    use = "submissionDocumentation"
                    admidApplyTo = None
                    if GROUPID == "":  #is an AIP identifier
                        GROUPID = myuuid
                        admidApplyTo = structMapDiv.getparent()

                    LABEL = "mets.xml-%s" % (GROUPID)
                    dmdSec, ID = createMDRefDMDSec(LABEL, itemdirectoryPath,
                                                   directoryPathSTR)
                    dmdSecs.append(dmdSec)
                    if admidApplyTo != None:
                        admidApplyTo.set("DMDID", ID)
                    else:
                        dspaceMetsDMDID = ID

        if GROUPID == "":
            sharedVariablesAcrossModules.globalErrorCount += 1
            print >> sys.stderr, "No groupID for file: \"", directoryPathSTR, "\""

        if use not in globalFileGrps:
            print >> sys.stderr, "Invalid use: \"%s\"" % (use)
            sharedVariablesAcrossModules.globalErrorCount += 1
        else:
            file = newChild(globalFileGrps[use],
                            "file",
                            sets=[("ID", FILEID), ("GROUPID", GROUPID)])
            if use == "original":
                filesInThisDirectory.append(file)
            #<Flocat xlink:href="objects/file1-UUID" locType="other" otherLocType="system"/>
            Flocat = newChild(file,
                              "FLocat",
                              sets=[(xlinkBNS + "href", directoryPathSTR),
                                    ("LOCTYPE", "OTHER"),
                                    ("OTHERLOCTYPE", "SYSTEM")])
            if includeAmdSec:
                AMD, ADMID = getAMDSec(myuuid, directoryPathSTR, use,
                                       fileGroupType, fileGroupIdentifier,
                                       transferUUID, itemdirectoryPath,
                                       typeOfTransfer)
                amdSecs.append(AMD)
                file.set("ADMID", ADMID)

    if dspaceMetsDMDID != None:
        for file in filesInThisDirectory:
            file.set("DMDID", dspaceMetsDMDID)

    for item in sorted(delayed,
                       cmp=sharedVariablesAcrossModules.collator.compare):
        itemdirectoryPath = os.path.join(directoryPath, item)
        directoryDiv = newChild(structMapDiv,
                                "div",
                                sets=[("TYPE", "Directory"), ("LABEL", item)])
        DMDIDS = createDMDIDSFromCSVParsedMetadataDirectories(
            itemdirectoryPath.replace(baseDirectoryPath, "", 1))
        if DMDIDS:
            directoryDiv.set("DMDID", DMDIDS)
        createFileSec(itemdirectoryPath, directoryDiv)
Ejemplo n.º 51
0
# @package Archivematica
# @subpackage archivematicaClientScript
# @author Joseph Perry <*****@*****.**>

import os
import sys
import uuid
import shutil
sys.path.append("/usr/lib/archivematica/archivematicaCommon")
import databaseInterface

if __name__ == '__main__':
    unitUUID = sys.argv[1]
    filePath = sys.argv[2]
    
    uuidLen = 36
    basename = os.path.basename(filePath)
    fileFauxUUID = basename[:uuidLen]
    fileName = basename[uuidLen:]
    dirname = os.path.dirname(filePath)
    
    sql = """SELECT fileUUID FROM FauxFileIDsMap WHERE fauxSIPUUID='%s' AND fauxFileUUID='%s';""" % (unitUUID, fileFauxUUID)
    rows = databaseInterface.queryAllSQL(sql)
    if len(rows) != 1:
        print >>sys.stderr, "Wrong rows returned", sql, rows
        exit(-1)
    originalFileUUID = rows[0][0]
    
    dst = os.path.join(dirname, originalFileUUID + fileName)
    print basename, " -> ", originalFileUUID + fileName
    shutil.move(filePath, dst)
def index_transfer_files(conn, uuid, pathToTransfer, index, type):
    filesIndexed = 0
    ingest_date  = str(datetime.datetime.today())[0:10]
    create_time  = time.time()

    # extract transfer name from path
    path_without_uuid = pathToTransfer[:-45]
    last_slash_position = path_without_uuid.rfind('/')
    transfer_name = path_without_uuid[last_slash_position + 1:]

    # get accessionId from transfers table using UUID
    accession_id = ''
    sql = "SELECT accessionID from Transfers WHERE transferUUID='" + MySQLdb.escape_string(uuid) + "'"

    rows = databaseInterface.queryAllSQL(sql)
    if len(rows) > 0:
        accession_id = rows[0][0]

    # get file UUID information
    fileUUIDs = {}
    sql = "SELECT currentLocation, fileUUID FROM Files WHERE transferUUID='" + MySQLdb.escape_string(uuid) + "'"

    rows = databaseInterface.queryAllSQL(sql)
    for row in rows:
        file_path = row[0]
        fileUUIDs[file_path] = row[1]

    for filepath in list_files_in_dir(pathToTransfer):
        if os.path.isfile(filepath):

            relative_path = '%transferDirectory%objects' + filepath.replace(pathToTransfer, '')

            sql = "SELECT fileUUID FROM Files WHERE currentLocation='" + MySQLdb.escape_string(relative_path) + "' AND transferUUID='" + MySQLdb.escape_string(uuid) + "'"
            rows = databaseInterface.queryAllSQL(sql)
            if len(rows) > 0:
                file_uuid = rows[0][0]
            else:
                file_uuid = ''

            indexData = {
              'filepath'     : filepath,
              'filename'     : os.path.basename(filepath),
              'fileuuid'     : file_uuid,
              'sipuuid'      : uuid,
              'accessionid'  : accession_id,
              'status'       : '',
              'origin'       : getDashboardUUID(),
              'ingestdate'   : ingest_date,
              'created'      : create_time
            }

            fileName, fileExtension = os.path.splitext(filepath)
            if fileExtension != '':
                indexData['fileExtension']  = fileExtension[1:].lower()

            conn.index(indexData, index, type, bulk=True)

            filesIndexed = filesIndexed + 1

    if filesIndexed > 0:
        conn.refresh()

    return filesIndexed
Ejemplo n.º 53
0
def createDigiprovMD(fileUUID):
    ret = []
    #EVENTS

    sql = "SELECT pk, fileUUID, eventIdentifierUUID, eventType, eventDateTime, eventDetail, eventOutcome, eventOutcomeDetailNote, linkingAgentIdentifier FROM Events WHERE fileUUID = '" + fileUUID + "';"
    rows = databaseInterface.queryAllSQL(sql)
    for row in rows:
        digiprovMD = etree.Element("digiprovMD")
        ret.append(digiprovMD)  #newChild(amdSec, "digiprovMD")
        #digiprovMD.set("ID", "digiprov-"+ os.path.basename(filename) + "-" + fileUUID)
        global globalDigiprovMDCounter
        globalDigiprovMDCounter += 1
        digiprovMD.set("ID", "digiprovMD_" + globalDigiprovMDCounter.__str__())

        mdWrap = newChild(digiprovMD, "mdWrap")
        mdWrap.set("MDTYPE", "PREMIS:EVENT")
        xmlData = newChild(mdWrap, "xmlData")
        event = etree.SubElement(xmlData, "event", nsmap={None: premisNS})
        event.set(
            xsiBNS + "schemaLocation", premisNS +
            " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd")
        event.set("version", "2.2")

        eventIdentifier = etree.SubElement(event, "eventIdentifier")
        etree.SubElement(eventIdentifier, "eventIdentifierType").text = "UUID"
        etree.SubElement(eventIdentifier, "eventIdentifierValue").text = row[2]

        etree.SubElement(event, "eventType").text = row[3]
        etree.SubElement(event,
                         "eventDateTime").text = row[4].__str__().replace(
                             " ", "T")
        etree.SubElement(event, "eventDetail").text = escape(row[5])

        eventOutcomeInformation = etree.SubElement(event,
                                                   "eventOutcomeInformation")
        etree.SubElement(eventOutcomeInformation, "eventOutcome").text = row[6]
        eventOutcomeDetail = etree.SubElement(eventOutcomeInformation,
                                              "eventOutcomeDetail")
        etree.SubElement(eventOutcomeDetail,
                         "eventOutcomeDetailNote").text = escape(row[7])

        if row[8]:
            linkingAgentIdentifier = etree.SubElement(
                event, "linkingAgentIdentifier")
            etree.SubElement(
                linkingAgentIdentifier,
                "linkingAgentIdentifierType").text = "Archivematica user pk"
            etree.SubElement(
                linkingAgentIdentifier,
                "linkingAgentIdentifierValue").text = row[8].__str__()

        #linkingAgentIdentifier
        sql = """SELECT agentIdentifierType, agentIdentifierValue, agentName, agentType FROM Agents;"""
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        while row != None:
            linkingAgentIdentifier = etree.SubElement(
                event, "linkingAgentIdentifier")
            etree.SubElement(linkingAgentIdentifier,
                             "linkingAgentIdentifierType").text = row[0]
            etree.SubElement(linkingAgentIdentifier,
                             "linkingAgentIdentifierValue").text = row[1]
            row = c.fetchone()
        sqlLock.release()
    return ret
Ejemplo n.º 54
0
def identifyCommands(fileName):
    """Identify file type(s)"""
    ret = []
    premisFile = opts.logsDirectory + "fileMeta/" + opts.fileUUID + ".xml"
    try:
        for pronomID in getPronomsFromPremis(premisFile):
            sql = """SELECT CR.pk, CR.command, CR.GroupMember
            FROM CommandRelationships AS CR
            JOIN FileIDs ON CR.fileID=FileIDs.pk
            JOIN CommandClassifications ON CR.commandClassification = CommandClassifications.pk
            JOIN FileIDsByPronom AS FIBP  ON FileIDs.pk = FIBP.FileIDs
            WHERE FIBP.FileID = '""" + pronomID.__str__() + """'
            AND CommandClassifications.classification = '""" + opts.commandClassifications + """';"""
            c, sqlLock = databaseInterface.querySQL(sql)
            row = c.fetchone()
            while row != None:
                ret.append(row)
                row = c.fetchone()
            sqlLock.release()
    except:
        print >> sys.stderr, "Failed to retrieve pronomIDs."
        ret = []

    if transcoder.fileExtension:
        sql = """SELECT CR.pk, CR.command, CR.GroupMember
        FROM CommandRelationships AS CR
        JOIN FileIDs ON CR.fileID=FileIDs.pk
        JOIN CommandClassifications ON CR.commandClassification = CommandClassifications.pk
        JOIN FileIDsByExtension AS FIBE  ON FileIDs.pk = FIBE.FileIDs
        WHERE FIBE.Extension = '""" + transcoder.fileExtension.__str__() + """'
        AND CommandClassifications.classification = '""" + opts.commandClassifications + """';"""
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        while row != None:
            ret.append(row)
            row = c.fetchone()
        sqlLock.release()

    if not len(ret):
        if opts.commandClassifications == "preservation":
            if inPreservationFormat():
                print "Already in preservation format."
            else:
                print >> sys.stderr, "Unable to verify archival readiness."
                #Issue 528: related to exit code
                exit(0)

        elif opts.commandClassifications == "access":
            sql = """SELECT CR.pk, CR.command, CR.GroupMember
            FROM CommandRelationships AS CR
            JOIN Commands AS C ON CR.command = C.pk
            WHERE C.description = 'Copying file to access directory.';"""
            rows = databaseInterface.queryAllSQL(sql)
            for row in rows:
                cl = transcoder.CommandLinker(row)
                copyExitCode = cl.execute()
                if copyExitCode:
                    exit(copyExitCode)
            if inAccessFormat():
                print "Already in access format."
                exit(0)
            else:
                print >> sys.stderr, "Unable to verify access readiness."
                #Issue 528: related to exit code
                exit(0)

        elif opts.commandClassifications == "thumbnail":
            #use default thumbnail
            print "Using default thumbnail"
            sql = """SELECT CR.pk, CR.command, CR.GroupMember
            FROM CommandRelationships AS CR
            JOIN Commands AS C ON CR.command = C.pk
            WHERE C.description = 'Using default thumbnail.';"""
            rows = databaseInterface.queryAllSQL(sql)
            for row in rows:
                cl = transcoder.CommandLinker(row)
                copyExitCode = cl.execute()
                exit(copyExitCode)

    return ret
Ejemplo n.º 55
0
def process_transfer(request, transfer_uuid):
    response = {}

    if request.user.id:
        # get transfer info
        transfer = models.Transfer.objects.get(uuid=transfer_uuid)
        transfer_path = transfer.currentlocation.replace(
            '%sharedPath%', helpers.get_server_config_value('sharedDirectory'))

        import MySQLdb
        import databaseInterface
        import databaseFunctions
        import shutil

        from archivematicaCreateStructuredDirectory import createStructuredDirectory
        from archivematicaCreateStructuredDirectory import createManualNormalizedDirectoriesList
        createStructuredDirectory(transfer_path,
                                  createManualNormalizedDirectories=False)

        processingDirectory = helpers.get_server_config_value(
            'processingDirectory')
        transfer_directory_name = os.path.basename(transfer_path[:-1])
        transfer_name = transfer_directory_name[:-37]
        sharedPath = helpers.get_server_config_value('sharedDirectory')

        tmpSIPDir = os.path.join(processingDirectory, transfer_name) + "/"
        #processSIPDirectory = os.path.join(sharedPath, 'watchedDirectories/system/autoProcessSIP') + '/'
        processSIPDirectory = os.path.join(
            sharedPath,
            'watchedDirectories/SIPCreation/SIPsUnderConstruction') + '/'
        #destSIPDir =  os.path.join(processSIPDirectory, transfer_name) + "/"

        #destSIPDir = os.path.join(processSIPDirectory, transfer_name + '-' + ) + "/"
        createStructuredDirectory(tmpSIPDir,
                                  createManualNormalizedDirectories=False)
        objectsDirectory = os.path.join(transfer_path, 'objects') + '/'
        """
        #create row in SIPs table if one doesn't already exist
        lookup_path = destSIPDir.replace(sharedPath, '%sharedPath%')
        #lookup_path = '%sharedPath%watchedDirectories/workFlowDecisions/createDip/' + transfer_name + '/'
        sql = " " "SELECT sipUUID FROM SIPs WHERE currentPath = '" " " + MySQLdb.escape_string(lookup_path) + "';"
        rows = databaseInterface.queryAllSQL(sql)
        if len(rows) > 0:
            row = rows[0]
            sipUUID = row[0]
        else:
            sipUUID = uuid.uuid4().__str__()
            databaseFunctions.createSIP(lookup_path, sipUUID)
        """

        sipUUID = uuid.uuid4().__str__()
        destSIPDir = os.path.join(processSIPDirectory, transfer_name) + "/"
        lookup_path = destSIPDir.replace(sharedPath, '%sharedPath%')
        databaseFunctions.createSIP(lookup_path, sipUUID)

        #move the objects to the SIPDir
        for item in os.listdir(objectsDirectory):
            shutil.move(os.path.join(objectsDirectory, item),
                        os.path.join(tmpSIPDir, "objects", item))

        #get the database list of files in the objects directory
        #for each file, confirm it's in the SIP objects directory, and update the current location/ owning SIP'
        sql = """SELECT  fileUUID, currentLocation FROM Files WHERE removedTime = 0 AND currentLocation LIKE '\%transferDirectory\%objects%' AND transferUUID =  '""" + transfer_uuid + "'"
        for row in databaseInterface.queryAllSQL(sql):
            fileUUID = row[0]
            currentPath = databaseFunctions.deUnicode(row[1])
            currentSIPFilePath = currentPath.replace("%transferDirectory%",
                                                     tmpSIPDir)
            if os.path.isfile(currentSIPFilePath):
                sql = """UPDATE Files SET currentLocation='%s', sipUUID='%s' WHERE fileUUID='%s'""" % (
                    MySQLdb.escape_string(
                        currentPath.replace(
                            "%transferDirectory%",
                            "%SIPDirectory%")), sipUUID, fileUUID)
                databaseInterface.runSQL(sql)
            else:
                print >> sys.stderr, "file not found: ", currentSIPFilePath

        #copy processingMCP.xml file
        src = os.path.join(os.path.dirname(objectsDirectory[:-1]),
                           "processingMCP.xml")
        dst = os.path.join(tmpSIPDir, "processingMCP.xml")
        shutil.copy(src, dst)

        #moveSIPTo processSIPDirectory
        shutil.move(tmpSIPDir, destSIPDir)

        elasticSearchFunctions.connect_and_change_transfer_file_status(
            transfer_uuid, '')

        response['message'] = 'SIP ' + sipUUID + ' created.'
    else:
        response['error'] = True
        response['message'] = 'Must be logged in.'

    return HttpResponse(
        simplejson.JSONEncoder(encoding='utf-8').encode(response),
        mimetype='application/json')
Ejemplo n.º 56
0
    objectsDirectory = sys.argv[1]
    transferName = sys.argv[2]
    transferUUID = sys.argv[3]
    processingDirectory = sys.argv[4]
    autoProcessSIPDirectory = sys.argv[5]
    sharedPath = sys.argv[6]
    sipName = transferName

    tmpSIPDir = os.path.join(processingDirectory, sipName) + "/"
    destSIPDir =  os.path.join(autoProcessSIPDirectory, sipName) + "/"
    createStructuredDirectory(tmpSIPDir, createManualNormalizedDirectories=False)

    #create row in SIPs table if one doesn't already exist
    lookup_path = destSIPDir.replace(sharedPath, '%sharedPath%')
    sql = """SELECT sipUUID FROM SIPs WHERE currentPath = '""" + MySQLdb.escape_string(lookup_path) + "';"
    rows = databaseInterface.queryAllSQL(sql)
    if len(rows) > 0:
        row = rows[0]
        sipUUID = row[0]
    else:
        sipUUID = uuid.uuid4().__str__()
        databaseFunctions.createSIP(lookup_path, sipUUID)

    #move the objects to the SIPDir
    for item in os.listdir(objectsDirectory):
        shutil.move(os.path.join(objectsDirectory, item), os.path.join(tmpSIPDir, "objects", item))

    #get the database list of files in the objects directory
    #for each file, confirm it's in the SIP objects directory, and update the current location/ owning SIP'
    sql = """SELECT  fileUUID, currentLocation FROM Files WHERE removedTime = 0 AND currentLocation LIKE '\%transferDirectory\%objects%' AND transferUUID =  '""" + transferUUID + "'"
    for row in databaseInterface.queryAllSQL(sql):
def archivematicaGetRights(metadataAppliesToList, fileUUID):
    """[(fileUUID, fileUUIDTYPE), (sipUUID, sipUUIDTYPE), (transferUUID, transferUUIDType)]"""
    ret = []
    rightsBasisActuallyOther = ["Policy", "Donor"]
    for metadataAppliesToidentifier, metadataAppliesToType in metadataAppliesToList:
        list = "RightsStatement.pk, rightsStatementIdentifierType, rightsStatementIdentifierType, rightsStatementIdentifierValue, rightsBasis, copyrightStatus, copyrightJurisdiction, copyrightStatusDeterminationDate, licenseTerms, copyrightApplicableStartDate, copyrightApplicableEndDate, licenseApplicableStartDate, licenseApplicableEndDate"
        key = list.split(", ")
        sql = """SELECT %s FROM RightsStatement LEFT JOIN RightsStatementCopyright ON RightsStatementCopyright.fkRightsStatement = RightsStatement.pk LEFT JOIN RightsStatementLicense ON RightsStatementLicense.fkRightsStatement = RightsStatement.pk WHERE metadataAppliesToidentifier = '%s' AND metadataAppliesToType = '%s';""" % (
            list, metadataAppliesToidentifier, metadataAppliesToType)
        rows = databaseInterface.queryAllSQL(sql)
        if not rows:
            continue
        else:
            for row in rows:
                valueDic = {}
                rightsStatement = etree.Element("rightsStatement",
                                                nsmap={None: premisNS})
                rightsStatement.set(
                    xsiBNS + "schemaLocation", premisNS +
                    " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd")
                #rightsStatement.set("version", "2.1") #cvc-complex-type.3.2.2: Attribute 'version' is not allowed to appear in element 'rightsStatement'.
                ret.append(rightsStatement)
                for i in range(len(key)):
                    valueDic[key[i]] = row[i]

                rightsStatementIdentifier = etree.SubElement(
                    rightsStatement, "rightsStatementIdentifier")
                if valueDic["rightsStatementIdentifierValue"]:
                    etree.SubElement(
                        rightsStatementIdentifier,
                        "rightsStatementIdentifierType"
                    ).text = valueDic["rightsStatementIdentifierType"]
                    etree.SubElement(
                        rightsStatementIdentifier,
                        "rightsStatementIdentifierValue"
                    ).text = valueDic["rightsStatementIdentifierValue"]
                else:
                    etree.SubElement(
                        rightsStatementIdentifier,
                        "rightsStatementIdentifierType").text = "UUID"
                    etree.SubElement(rightsStatementIdentifier,
                                     "rightsStatementIdentifierValue"
                                     ).text = uuid.uuid4().__str__()
                if valueDic["rightsBasis"] in rightsBasisActuallyOther:
                    etree.SubElement(rightsStatement,
                                     "rightsBasis").text = "Other"
                else:
                    etree.SubElement(
                        rightsStatement,
                        "rightsBasis").text = valueDic["rightsBasis"]

                #copright information
                if valueDic["rightsBasis"].lower() in ["copyright"]:
                    sql = """SELECT pk, copyrightStatus, copyrightJurisdiction, copyrightStatusDeterminationDate, copyrightApplicableStartDate, copyrightApplicableEndDate, copyrightApplicableEndDateOpen FROM RightsStatementCopyright WHERE fkRightsStatement = %d""" % (
                        valueDic["RightsStatement.pk"])
                    rows2 = databaseInterface.queryAllSQL(sql)
                    for row2 in rows2:
                        copyrightInformation = etree.SubElement(
                            rightsStatement, "copyrightInformation")
                        etree.SubElement(copyrightInformation,
                                         "copyrightStatus"
                                         ).text = valueDic["copyrightStatus"]
                        copyrightJurisdiction = valueDic[
                            "copyrightJurisdiction"]
                        copyrightJurisdictionCode = getCodeForCountry(
                            copyrightJurisdiction.__str__().upper())
                        if copyrightJurisdictionCode != None:
                            copyrightJurisdiction = copyrightJurisdictionCode
                        etree.SubElement(copyrightInformation,
                                         "copyrightJurisdiction"
                                         ).text = copyrightJurisdiction
                        etree.SubElement(
                            copyrightInformation,
                            "copyrightStatusDeterminationDate"
                        ).text = formatDate(
                            valueDic["copyrightStatusDeterminationDate"])
                        #copyrightNote Repeatable
                        sql = "SELECT copyrightNote FROM RightsStatementCopyrightNote WHERE fkRightsStatementCopyrightInformation = %d;" % (
                            row2[0])
                        rows3 = databaseInterface.queryAllSQL(sql)
                        for row3 in rows3:
                            etree.SubElement(copyrightInformation,
                                             "copyrightNote").text = row3[0]

                        #RightsStatementCopyrightDocumentationIdentifier
                        getDocumentationIdentifier(
                            valueDic["RightsStatement.pk"],
                            copyrightInformation)

                        copyrightApplicableDates = etree.SubElement(
                            copyrightInformation, "copyrightApplicableDates")
                        if valueDic["copyrightApplicableStartDate"]:
                            etree.SubElement(
                                copyrightApplicableDates,
                                "startDate").text = formatDate(
                                    valueDic["copyrightApplicableStartDate"])
                        if row2[6]:  #, copyrightApplicableEndDateOpen
                            etree.SubElement(copyrightApplicableDates,
                                             "endDate").text = "OPEN"
                        elif valueDic["copyrightApplicableEndDate"]:
                            etree.SubElement(
                                copyrightApplicableDates,
                                "endDate").text = formatDate(
                                    valueDic["copyrightApplicableEndDate"])

                elif valueDic["rightsBasis"].lower() in ["license"]:
                    sql = """SELECT licenseTerms, licenseApplicableStartDate, licenseApplicableEndDate,  licenseDocumentationIdentifierType, licenseDocumentationIdentifierValue, RightsStatementLicense.pk, licenseDocumentationIdentifierRole, licenseApplicableEndDateOpen
                                FROM RightsStatementLicense JOIN RightsStatementLicenseDocumentationIdentifier ON RightsStatementLicenseDocumentationIdentifier.fkRightsStatementLicense = RightsStatementLicense.pk WHERE RightsStatementLicense.fkRightsStatement = %d;""" % (
                        valueDic["RightsStatement.pk"])
                    rows2 = databaseInterface.queryAllSQL(sql)
                    for row2 in rows2:
                        licenseInformation = etree.SubElement(
                            rightsStatement, "licenseInformation")

                        licenseDocumentIdentifier = etree.SubElement(
                            licenseInformation,
                            "licenseDocumentationIdentifier")
                        etree.SubElement(licenseDocumentIdentifier,
                                         "licenseDocumentationIdentifierType"
                                         ).text = row2[3]
                        etree.SubElement(
                            licenseDocumentIdentifier,
                            "licenseDocumentationIdentifierValue"
                        ).text = row2[4]
                        etree.SubElement(
                            licenseDocumentIdentifier,
                            "licenseDocumentationRole").text = row2[6]

                        etree.SubElement(
                            licenseInformation,
                            "licenseTerms").text = valueDic["licenseTerms"]

                        sql = "SELECT licenseNote FROM RightsStatementLicenseNote WHERE fkRightsStatementLicense = %d;" % (
                            row2[5])
                        rows3 = databaseInterface.queryAllSQL(sql)
                        for row3 in rows3:
                            etree.SubElement(licenseInformation,
                                             "licenseNote").text = row3[0]

                        licenseApplicableDates = etree.SubElement(
                            licenseInformation, "licenseApplicableDates")
                        if valueDic["licenseApplicableStartDate"]:
                            etree.SubElement(
                                licenseApplicableDates,
                                "startDate").text = formatDate(
                                    valueDic["licenseApplicableStartDate"])
                        if row2[7]:  #licenseApplicableEndDateOpen
                            etree.SubElement(licenseApplicableDates,
                                             "endDate").text = "OPEN"
                        elif valueDic["licenseApplicableEndDate"]:
                            etree.SubElement(
                                licenseApplicableDates,
                                "endDate").text = formatDate(
                                    valueDic["licenseApplicableEndDate"])

                elif valueDic["rightsBasis"].lower() in ["statute"]:
                    #4.1.5 statuteInformation (O, R)
                    getstatuteInformation(valueDic["RightsStatement.pk"],
                                          rightsStatement)

                elif valueDic["rightsBasis"].lower() in [
                        "donor", "policy", "other"
                ]:
                    otherRightsInformation = etree.SubElement(
                        rightsStatement, "otherRightsInformation")
                    sql = """SELECT pk, otherRightsBasis, otherRightsApplicableStartDate, otherRightsApplicableEndDate, otherRightsApplicableEndDateOpen FROM RightsStatementOtherRightsInformation WHERE RightsStatementOtherRightsInformation.fkRightsStatement = %d;""" % (
                        valueDic["RightsStatement.pk"])
                    rows2 = databaseInterface.queryAllSQL(sql)
                    for row2 in rows2:
                        #otherRightsDocumentationIdentifier
                        sql = """SELECT otherRightsDocumentationIdentifierType, otherRightsDocumentationIdentifierValue, otherRightsDocumentationIdentifierRole FROM RightsStatementOtherRightsDocumentationIdentifier WHERE fkRightsStatementotherRightsInformation = %s """ % (
                            row2[0])
                        rows3 = databaseInterface.queryAllSQL(sql)
                        for row3 in rows3:
                            otherRightsDocumentationIdentifier = etree.SubElement(
                                otherRightsInformation,
                                "otherRightsDocumentationIdentifier")
                            etree.SubElement(
                                otherRightsDocumentationIdentifier,
                                "otherRightsDocumentationIdentifierType"
                            ).text = row3[0]
                            etree.SubElement(
                                otherRightsDocumentationIdentifier,
                                "otherRightsDocumentationIdentifierValue"
                            ).text = row3[1]
                            etree.SubElement(
                                otherRightsDocumentationIdentifier,
                                "otherRightsDocumentationRole").text = row3[2]

                        otherRightsBasis = row2[1]

                        if not otherRightsBasis or valueDic[
                                "rightsBasis"] in rightsBasisActuallyOther:  #not 100%
                            otherRightsBasis = valueDic["rightsBasis"]
                        etree.SubElement(
                            otherRightsInformation,
                            "otherRightsBasis").text = otherRightsBasis

                        otherRightsApplicableStartDate = row2[2]
                        otherRightsApplicableEndDate = row2[3]
                        otherRightsApplicableEndDateOpen = row2[4]
                        if otherRightsApplicableStartDate or otherRightsApplicableEndDate:
                            otherRightsApplicableDates = etree.SubElement(
                                otherRightsInformation,
                                "otherRightsApplicableDates")
                            if otherRightsApplicableStartDate:
                                etree.SubElement(
                                    otherRightsApplicableDates,
                                    "startDate").text = formatDate(
                                        otherRightsApplicableStartDate)
                            if otherRightsApplicableEndDateOpen:
                                etree.SubElement(otherRightsApplicableDates,
                                                 "endDate").text = "OPEN"
                            elif otherRightsApplicableEndDate:
                                etree.SubElement(
                                    otherRightsApplicableDates,
                                    "endDate").text = formatDate(
                                        otherRightsApplicableEndDate)

                        #otherRightsNote Repeatable
                        sql = "SELECT otherRightsNote FROM RightsStatementOtherRightsNote WHERE fkRightsStatementOtherRightsInformation = %d;" % (
                            row2[0])
                        rows3 = databaseInterface.queryAllSQL(sql)
                        for row3 in rows3:
                            etree.SubElement(otherRightsInformation,
                                             "otherRightsNote").text = row3[0]

                #4.1.6 rightsGranted (O, R)
                getrightsGranted(valueDic["RightsStatement.pk"],
                                 rightsStatement)

                #4.1.7 linkingObjectIdentifier (O, R)
                linkingObjectIdentifier = etree.SubElement(
                    rightsStatement, "linkingObjectIdentifier")
                etree.SubElement(linkingObjectIdentifier,
                                 "linkingObjectIdentifierType").text = "UUID"
                etree.SubElement(
                    linkingObjectIdentifier,
                    "linkingObjectIdentifierValue").text = fileUUID
    return ret
Ejemplo n.º 58
0
    def alreadyNormalizedManually(self, unit, CommandClassification):
        """ Return True if file was normalized manually, False if not.

        Checks by looking for access/preservation files for a give original file.

        Check the manualNormalization/access and manualNormalization/preservation
        directories for access and preservation files.  If a nomalization.csv
        file is specified, check there first for the mapping between original
        file and access/preservation file. """

        # Setup
        SIPUUID = unit.owningUnit.UUID
        fileUUID = unit.UUID
        SIPPath = unit.owningUnit.currentPath
        filePath = unit.currentPath
        bname = os.path.basename(filePath)
        dirName = os.path.dirname(filePath)
        # If normalization.csv provided, check there for mapping from original
        # to access/preservation file
        SIPPath = SIPPath.replace(
            "%sharedPath%",
            archivematicaMCP.config.get('MCPServer', "sharedDirectory", 1))
        normalization_csv = os.path.join(SIPPath, "objects",
                                         "manualNormalization",
                                         "normalization.csv")
        if os.path.isfile(normalization_csv):
            found = False
            with open(normalization_csv, 'rb') as csv_file:
                reader = csv.reader(csv_file)
                # Search the file for an original filename that matches the one provided
                try:
                    for row in reader:
                        if "#" in row[0]:  # ignore comments
                            continue
                        original, access, preservation = row
                        if original.lower() == bname.lower():
                            found = True
                            break
                except csv.Error as e:
                    print >> sys.stderr, "Error reading {filename} on line {linenum}".format(
                        filename=normalization_csv, linenum=reader.line_num)
                    return False  # how indicate error?

            # If we didn't find a match, let it fall through to the usual method
            if found:
                # No manually normalized file for command classification
                if CommandClassification == "preservation" and not preservation:
                    return False
                if CommandClassification == "access" and not access:
                    return False

                # If we found a match, verify access/preservation exists in DB
                # match and pull original location b/c sanitization
                if CommandClassification == "preservation":
                    filename = preservation
                elif CommandClassification == "access":
                    filename = access
                else:
                    return False
                sql = """SELECT Files.fileUUID, Files.currentLocation 
                         FROM Files 
                         WHERE sipUUID = '{SIPUUID}' AND 
                            originalLocation LIKE '%{filename}' AND 
                            removedTime = 0;""".format(SIPUUID=SIPUUID,
                                                       filename=filename)
                rows = databaseInterface.queryAllSQL(sql)
                return bool(rows)

        # Assume that any access/preservation file found with the right
        # name is the correct one
        bname = os.path.splitext(bname)[0]
        path = os.path.join(dirName, bname)
        if CommandClassification == "preservation":
            path = path.replace(
                "%SIPDirectory%objects/",
                "%SIPDirectory%objects/manualNormalization/preservation/")
        elif CommandClassification == "access":
            path = path.replace(
                "%SIPDirectory%objects/",
                "%SIPDirectory%objects/manualNormalization/access/")
        else:
            return False
        try:
            sql = """SELECT fileUUID FROM Files WHERE sipUUID = '%s' AND currentLocation LIKE '%s%%' AND removedTime = 0;""" % (
                SIPUUID, path.replace("%", "\%"))
            ret = bool(databaseInterface.queryAllSQL(sql))
            return ret
        except Exception as inst:
            print "DEBUG EXCEPTION!"
            traceback.print_exc(file=sys.stdout)
            print >> sys.stderr, type(inst), inst.args
    def __init__(self, jobChainLink, pk, unit):
        global outputLock
        self.tasks = {}
        self.tasksLock = threading.Lock()
        self.pk = pk
        self.jobChainLink = jobChainLink
        self.exitCode = 0
        self.clearToNextLink = False

        opts = {
            "inputFile": "%relativeLocation%",
            "fileUUID": "%fileUUID%",
            'commandClassifications': '%commandClassifications%',
            "taskUUID": "%taskUUID%",
            "objectsDirectory": "%SIPObjectsDirectory%",
            "logsDirectory": "%SIPLogsDirectory%",
            "sipUUID": "%SIPUUID%",
            "sipPath": "%SIPDirectory%",
            "fileGrpUse": "%fileGrpUse%",
            "normalizeFileGrpUse": "%normalizeFileGrpUse%",
            "excludeDirectory": "%excludeDirectory%",
            "standardErrorFile": "%standardErrorFile%",
            "standardOutputFile": "%standardOutputFile%"
        }

        SIPReplacementDic = unit.getReplacementDic(unit.currentPath)
        for optsKey, optsValue in opts.iteritems():
            if self.jobChainLink.passVar != None:
                if isinstance(self.jobChainLink.passVar, replacementDic):
                    opts[optsKey] = self.jobChainLink.passVar.replace(
                        opts[optsKey])[0]

            commandReplacementDic = unit.getReplacementDic()
            for key, value in commandReplacementDic.iteritems():
                opts[optsKey] = opts[optsKey].replace(key, value)

            for key, value in SIPReplacementDic.iteritems():
                opts[optsKey] = opts[optsKey].replace(key, value)

        commandReplacementDic = unit.getReplacementDic()
        sql = """SELECT CommandRelationships.pk FROM CommandRelationships JOIN Commands ON CommandRelationships.command = Commands.pk WHERE CommandRelationships.pk = '%s';""" % (
            pk.__str__())
        rows = databaseInterface.queryAllSQL(sql)
        taskCount = 0
        tasksList = []
        if rows:
            self.tasksLock.acquire()
            for row in rows:
                UUID = uuid.uuid4().__str__()
                opts["taskUUID"] = UUID
                opts["CommandRelationship"] = pk.__str__()
                execute = "transcoder_cr%s" % (pk)
                deUnicode(execute)
                arguments = row.__str__()
                standardOutputFile = opts["standardOutputFile"]
                standardErrorFile = opts["standardErrorFile"]
                self.standardOutputFile = standardOutputFile
                self.standardErrorFile = standardErrorFile
                self.execute = execute
                self.arguments = arguments
                task = taskStandard(self,
                                    execute,
                                    opts,
                                    standardOutputFile,
                                    standardErrorFile,
                                    outputLock=outputLock,
                                    UUID=UUID)
                self.tasks[UUID] = task
                databaseFunctions.logTaskCreatedSQL(self,
                                                    commandReplacementDic,
                                                    UUID, arguments)
                taskCount += 1
                tasksList.append(task)
            self.tasksLock.release()

            for task in tasksList:
                task.performTask()

        else:
            self.jobChainLink.linkProcessingComplete(self.exitCode)
Ejemplo n.º 60
0
    def __init__(self, jobChainLink, pk, unit):
        self.tasks = {}
        self.tasksLock = threading.Lock()
        self.pk = pk
        self.jobChainLink = jobChainLink
        self.exitCode = 0
        self.clearToNextLink = False
        sql = """SELECT * FROM StandardTasksConfigs where pk = '%s'""" % (
            pk.__str__())
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        while row != None:
            filterFileEnd = deUnicode(row[1])
            filterFileStart = deUnicode(row[2])
            filterSubDir = deUnicode(row[3])
            requiresOutputLock = row[4]
            self.standardOutputFile = deUnicode(row[5])
            self.standardErrorFile = deUnicode(row[6])
            self.execute = deUnicode(row[7])
            self.arguments = deUnicode(row[8])
            row = c.fetchone()
        sqlLock.release()
        if requiresOutputLock:
            outputLock = threading.Lock()
        else:
            outputLock = None

        SIPReplacementDic = unit.getReplacementDic(unit.currentPath)

        SIPUUID = unit.owningUnit.UUID
        sql = """SELECT variableValue FROM UnitVariables WHERE unitType = 'SIP' AND variable = 'normalizationFileIdentificationToolIdentifierTypes' AND unitUUID = '%s';""" % (
            SIPUUID)
        rows = databaseInterface.queryAllSQL(sql)
        if len(rows):
            fileIdentificationRestriction = rows[0][0]
        else:
            fileIdentificationRestriction = None

        self.tasksLock.acquire()
        for file, fileUnit in unit.fileList.items():
            #print "file:", file, fileUnit
            if filterFileEnd:
                if not file.endswith(filterFileEnd):
                    continue
            if filterFileStart:
                if not os.path.basename(file).startswith(filterFileStart):
                    continue
            if filterSubDir:
                #print "file", file, type(file)
                #print unit.pathString, type(unit.pathString)
                #filterSubDir = filterSubDir.encode('utf-8')
                #print filterSubDir, type(filterSubDir)

                if not file.startswith(unit.pathString + filterSubDir):
                    print "skipping file", file, filterSubDir, " :   \t Doesn't start with: ", unit.pathString + filterSubDir
                    continue

            standardOutputFile = self.standardOutputFile
            standardErrorFile = self.standardErrorFile
            execute = self.execute
            arguments = self.arguments

            if self.jobChainLink.passVar != None:
                if isinstance(self.jobChainLink.passVar, replacementDic):
                    execute, arguments, standardOutputFile, standardErrorFile = self.jobChainLink.passVar.replace(
                        execute, arguments, standardOutputFile,
                        standardErrorFile)

            fileUUID = unit.UUID
            ComandClassification = self.execute
            #passVar=self.jobChainLink.passVar
            toPassVar = eval(arguments)
            toPassVar.update({
                "%standardErrorFile%": standardErrorFile,
                "%standardOutputFile%": standardOutputFile,
                '%commandClassifications%': ComandClassification
            })
            #print "debug", toPassVar, toPassVar['%normalizeFileGrpUse%'], unit.fileGrpUse
            passVar = replacementDic(toPassVar)
            if toPassVar[
                    '%normalizeFileGrpUse%'] != unit.fileGrpUse or self.alreadyNormalizedManually(
                        unit, ComandClassification):
                #print "debug: ", unit.currentPath, unit.fileGrpUse
                self.jobChainLink.linkProcessingComplete(
                    self.exitCode, passVar=self.jobChainLink.passVar)
            else:
                taskType = databaseInterface.queryAllSQL(
                    "SELECT pk FROM TaskTypes WHERE description = '%s';" %
                    ("Transcoder task type"))[0][0]
                if fileIdentificationRestriction:
                    sql = """SELECT MicroServiceChainLinks.pk, CommandRelationships.pk, CommandRelationships.command FROM FilesIdentifiedIDs JOIN FileIDs ON FilesIdentifiedIDs.fileID = FileIDs.pk JOIN FileIDTypes ON FileIDs.fileIDType = FileIDTypes.pk JOIN CommandRelationships ON FilesIdentifiedIDs.fileID = CommandRelationships.fileID JOIN CommandClassifications ON CommandClassifications.pk = CommandRelationships.commandClassification JOIN TasksConfigs ON TasksConfigs.taskTypePKReference = CommandRelationships.pk JOIN MicroServiceChainLinks ON MicroServiceChainLinks.currentTask = TasksConfigs.pk WHERE TasksConfigs.taskType = '%s' AND FilesIdentifiedIDs.fileUUID = '%s' AND CommandClassifications.classification = '%s' AND (%s) AND CommandRelationships.enabled = TRUE AND CommandClassifications.enabled = TRUE AND FileIDTypes.enabled = TRUE GROUP BY MicroServiceChainLinks.pk;""" % (
                        taskType, fileUUID, ComandClassification,
                        fileIdentificationRestriction)
                else:
                    sql = """SELECT MicroServiceChainLinks.pk, CommandRelationships.pk, CommandRelationships.command FROM FilesIdentifiedIDs JOIN CommandRelationships ON FilesIdentifiedIDs.fileID = CommandRelationships.fileID JOIN CommandClassifications ON CommandClassifications.pk = CommandRelationships.commandClassification JOIN TasksConfigs ON TasksConfigs.taskTypePKReference = CommandRelationships.pk JOIN MicroServiceChainLinks ON MicroServiceChainLinks.currentTask = TasksConfigs.pk WHERE TasksConfigs.taskType = '%s' AND FilesIdentifiedIDs.fileUUID = '%s' AND CommandClassifications.classification = '%s' AND CommandRelationships.enabled = TRUE AND CommandClassifications.enabled = TRUE GROUP BY MicroServiceChainLinks.pk;""" % (
                        taskType, fileUUID, ComandClassification)
                rows = databaseInterface.queryAllSQL(sql)

                commandsRun = {}
                if rows and len(rows):
                    for row in rows:
                        microServiceChainLink, commandRelationship, command = row
                        if command in commandsRun:
                            link = commandsRun[command]
                            sql = """SELECT exitCode FROM Tasks JOIN Jobs ON Jobs.jobUUID = Tasks.jobUUID WHERE Tasks.jobUUID IN (SELECT jobUUID FROM Jobs WHERE subJobOf = '%s') AND Jobs.MicroServiceChainLinksPK = '%s';""" % (
                                self.jobChainLink.UUID, link)
                            rows = databaseInterface.queryAllSQL(sql)
                            if len(rows) != 1:
                                print sys.stderr, "Bad query:", sql
                            for row in rows:
                                ret = row[0]
                                sql = "UPDATE CommandRelationships SET countAttempts=countAttempts+1 WHERE pk='" + commandRelationship + "';"
                                databaseInterface.runSQL(sql)
                                if ret:
                                    column = "countNotOK"
                                else:
                                    column = "countOK"
                                sql = "UPDATE CommandRelationships SET " + column + "=" + column + "+1 WHERE pk='" + commandRelationship + "';"
                                databaseInterface.runSQL(sql)
                        else:
                            commandsRun[command] = microServiceChainLink
                            jobChainLink.jobChain.nextChainLink(
                                row[0],
                                passVar=passVar,
                                incrementLinkSplit=True,
                                subJobOf=self.jobChainLink.UUID)
                else:
                    sql = """SELECT MicroserviceChainLink, CommandRelationships.pk, CommandRelationships.command FROM DefaultCommandsForClassifications JOIN MicroServiceChainLinks ON MicroServiceChainLinks.pk = DefaultCommandsForClassifications.MicroserviceChainLink  JOIN TasksConfigs ON TasksConfigs.pk = MicroServiceChainLinks.currentTask  JOIN CommandRelationships ON CommandRelationships.pk = TasksConfigs.taskTypePKReference JOIN CommandClassifications ON CommandClassifications.pk = DefaultCommandsForClassifications.forClassification WHERE TasksConfigs.taskType = '5e70152a-9c5b-4c17-b823-c9298c546eeb' AND CommandClassifications.classification = '%s' AND DefaultCommandsForClassifications.enabled = TRUE;""" % (
                        ComandClassification)
                    rows = databaseInterface.queryAllSQL(sql)

                    for row in rows:
                        microServiceChainLink, commandRelationship, command = row

                        if command in commandsRun:
                            link = commandsRun[command]
                            sql = """SELECT exitCode FROM Tasks JOIN Jobs ON Jobs.jobUUID = Tasks.jobUUID WHERE Tasks.jobUUID IN (SELECT jobUUID FROM Jobs WHERE subJobOf = '%s') AND Jobs.MicroServiceChainLinksPK = '%s';""" % (
                                self.jobChainLink.UUID, link)
                            rows = databaseInterface.queryAllSQL(sql)
                            if len(rows) != 1:
                                print sys.stderr, "Bad query:", sql
                            for row in rows:
                                ret = row[0]
                                sql = "UPDATE CommandRelationships SET countAttempts=countAttempts+1 WHERE pk='" + commandRelationship + "';"
                                databaseInterface.runSQL(sql)
                                if ret:
                                    column = "countNotOK"
                                else:
                                    column = "countOK"
                                sql = "UPDATE CommandRelationships SET " + column + "=" + column + "+1 WHERE pk='" + commandRelationship + "';"
                                databaseInterface.runSQL(sql)
                        else:
                            jobChainLink.jobChain.nextChainLink(
                                microServiceChainLink,
                                passVar=passVar,
                                incrementLinkSplit=True,
                                subJobOf=self.jobChainLink.UUID)
                            commandsRun[command] = microServiceChainLink
                self.jobChainLink.linkProcessingComplete(
                    self.exitCode, passVar=self.jobChainLink.passVar)