def bridgeWatchedDirectories(): "" global allLinks sql = "SELECT watchedDirectoryPath, startingLink FROM WatchedDirectories Join MicroServiceChains ON WatchedDirectories.chain = MicroServiceChains.pk;" rows = databaseInterface.queryAllSQL(sql) for row in rows: countOfSources = 0 watchedDirectoryPath, startingLink = row sql = "SELECT MicroServiceChainLinks.pk FROM StandardTasksConfigs JOIN TasksConfigs ON TasksConfigs.taskTypePKReference = StandardTasksConfigs.pk JOIN MicroServiceChainLinks ON MicroServiceChainLinks.currentTask = TasksConfigs.pk WHERE ( execute LIKE 'moveSIP%%' OR execute LIKE 'moveTransfer%%') AND taskType = '36b2e239-4a57-4aa5-8ebc-7a29139baca6' AND arguments like '%%%s%%';" % (watchedDirectoryPath.replace('%', '\%')) rows2 = databaseInterface.queryAllSQL(sql) for row2 in rows2: microServiceChainLink = row2[0] addArrow(microServiceChainLink, startingLink, color="yellow") countOfSources +=1 sql = "SELECT MicroServiceChainLinks.pk FROM StandardTasksConfigs JOIN TasksConfigs ON TasksConfigs.taskTypePKReference = StandardTasksConfigs.pk JOIN MicroServiceChainLinks ON MicroServiceChainLinks.currentTask = TasksConfigs.pk WHERE ( execute LIKE 'moveSIP%%' OR execute LIKE 'moveTransfer%%') AND taskType = '36b2e239-4a57-4aa5-8ebc-7a29139baca6' AND arguments like '%%%s%%';" % (watchedDirectoryPath.replace('%watchDirectoryPath%', '%sharedPath%watchedDirectories/', 1).replace('%', '\%')) rows2 = databaseInterface.queryAllSQL(sql) for row2 in rows2: microServiceChainLink = row2[0] addArrow(microServiceChainLink, startingLink, color="yellow") countOfSources +=1 if countOfSources == 0: print "no sources for watched directory: ", watchedDirectoryPath return
def index_transfer_files(conn, uuid, pathToTransfer, index, type): filesIndexed = 0 ingest_date = str(datetime.datetime.today())[0:10] create_time = time.time() # extract transfer name from path path_without_uuid = pathToTransfer[:-45] last_slash_position = path_without_uuid.rfind('/') transfer_name = path_without_uuid[last_slash_position + 1:] # get accessionId from transfers table using UUID accession_id = '' sql = "SELECT accessionID from Transfers WHERE transferUUID='" + MySQLdb.escape_string(uuid) + "'" rows = databaseInterface.queryAllSQL(sql) if len(rows) > 0: accession_id = rows[0][0] # get file UUID information fileUUIDs = {} sql = "SELECT currentLocation, fileUUID FROM Files WHERE transferUUID='" + MySQLdb.escape_string(uuid) + "'" rows = databaseInterface.queryAllSQL(sql) for row in rows: file_path = row[0] fileUUIDs[file_path] = row[1] for filepath in list_files_in_dir(pathToTransfer): if os.path.isfile(filepath): relative_path = '%transferDirectory%objects' + filepath.replace(pathToTransfer, '') sql = "SELECT fileUUID FROM Files WHERE currentLocation='" + MySQLdb.escape_string(relative_path) + "' AND transferUUID='" + MySQLdb.escape_string(uuid) + "'" rows = databaseInterface.queryAllSQL(sql) if len(rows) > 0: file_uuid = rows[0][0] else: file_uuid = '' indexData = { 'filepath' : filepath, 'filename' : os.path.basename(filepath), 'fileuuid' : file_uuid, 'sipuuid' : uuid, 'accessionid' : accession_id, 'status' : '', 'origin' : getDashboardUUID(), 'ingestdate' : ingest_date, 'created' : create_time } fileName, fileExtension = os.path.splitext(filepath) if fileExtension != '': indexData['fileExtension'] = fileExtension[1:].lower() conn.index(indexData, index, type) filesIndexed = filesIndexed + 1 return filesIndexed
def getstatuteInformation(pk, parent): sql = "SELECT pk, statuteJurisdiction, statuteCitation, statuteInformationDeterminationDate, statuteapplicablestartdate, statuteapplicableenddate, statuteApplicableEndDateOpen FROM RightsStatementStatuteInformation WHERE fkRightsStatement = %d" % (pk) #print sql rows = databaseInterface.queryAllSQL(sql) for row in rows: statuteInformation = etree.SubElement(parent, "statuteInformation") etree.SubElement(statuteInformation, "statuteJurisdiction").text = row[1] etree.SubElement(statuteInformation, "statuteCitation").text = row[2] etree.SubElement(statuteInformation, "statuteInformationDeterminationDate").text = formatDate(row[3]) #statuteNote Repeatable sql = "SELECT statuteNote FROM RightsStatementStatuteInformationNote WHERE fkRightsStatementStatuteInformation = %d;" % (row[0]) rows2 = databaseInterface.queryAllSQL(sql) for row2 in rows2: etree.SubElement(statuteInformation, "statuteNote").text = row2[0] sql = """SELECT statuteDocumentationIdentifierType, statuteDocumentationIdentifierValue, statuteDocumentationIdentifierRole FROM RightsStatementStatuteDocumentationIdentifier WHERE fkRightsStatementStatuteInformation = %s """ % (row[0]) rows2 = databaseInterface.queryAllSQL(sql) for row2 in rows2: statuteDocumentationIdentifier = etree.SubElement(statuteInformation, "statuteDocumentationIdentifier") etree.SubElement(statuteDocumentationIdentifier, "statuteDocumentationIdentifierType").text = row2[0] etree.SubElement(statuteDocumentationIdentifier, "statuteDocumentationIdentifierValue").text = row2[1] etree.SubElement(statuteDocumentationIdentifier, "statuteDocumentationRole").text = row2[2] statuteapplicablestartdate = row[4] statuteapplicableenddate = row[5] statuteApplicableEndDateOpen = row[6] if statuteapplicablestartdate or statuteapplicableenddate or statuteApplicableEndDateOpen: statuteApplicableDates = etree.SubElement(statuteInformation, "statuteApplicableDates") if statuteapplicablestartdate: etree.SubElement(statuteApplicableDates, "startDate").text = formatDate(statuteapplicablestartdate) if statuteApplicableEndDateOpen: etree.SubElement(statuteApplicableDates, "endDate").text = "OPEN" elif statuteapplicableenddate: etree.SubElement(statuteApplicableDates, "endDate").text = formatDate(statuteapplicableenddate)
def autoUpdateFPR(self): self.maxLastUpdate = self.getMaxLastUpdate() maxLastUpdateAtStart = self.maxLastUpdate databaseInterface.runSQL("SET foreign_key_checks = 0;") for x in [ ("CommandRelationships", self.fprserver + "/fpr/api/v1/CommandRelationship/"), ("FileIDsBySingleID", self.fprserver + "/fpr/api/v1/FileIDsBySingleID/"), ("FileIDs", self.fprserver + "/fpr/api/v1/FileID/"), ("Commands", self.fprserver + "/fpr/api/v1/Command/"), ("CommandTypes", self.fprserver + "/fpr/api/v1/CommandType/"), ("CommandClassifications", self.fprserver + "/fpr/api/v1/CommandClassification/"), ("FileIDTypes", self.fprserver + "/fpr/api/v1/FileIDType/"), ]: table, url = x # params = {"format":"json", "order_by":"lastmodified", "lastmodified__gte":maxLastUpdateAtStart, "limit":"0"} params = { "format": "json", "order_by": "lastmodified", "lastmodified__gte": maxLastUpdateAtStart, "limit": "0", } entries = getFromRestAPI(url, params, verbose=False, auth=None) # print "test", entries for entry in entries: # print table, entry # check if it already exists sql = """SELECT pk FROM %s WHERE pk = '%s'""" % (table, entry["uuid"]) if databaseInterface.queryAllSQL(sql): # pass continue if not "replaces" in entry: print >> sys.stderr, "Required entry 'replaces' missing." print entry # continue exit(3) # If updating a disabled entry, it will continue to be disabled. if entry["replaces"] != None: sql = """SELECT enabled FROM %s WHERE pk = '%s';""" % (table, entry["replaces"]) enabled = databaseInterface.queryAllSQL(sql)[0][0] if not enabled: entry["enabled"] = 0 sql = """UPDATE %s SET enabled=FALSE WHERE pk = '%s';""" % (table, entry["replaces"]) databaseInterface.runSQL(sql) self.create(table, entry) addLinks() databaseInterface.runSQL("SET foreign_key_checks = 1;") if self.maxLastUpdate != maxLastUpdateAtStart: self.setMaxLastUpdate(self.maxLastUpdate)
def getAgentForFileUUID(fileUUID): agent = None rows = databaseInterface.queryAllSQL("""SELECT sipUUID, transferUUID FROM Files WHERE fileUUID = '%s';""" % (fileUUID)) sipUUID, transferUUID = rows[0] if sipUUID: rows = databaseInterface.queryAllSQL("""SELECT variableValue FROM UnitVariables WHERE unitType = '%s' AND unitUUID = '%s' AND variable = '%s';""" % ('SIP', sipUUID, "activeAgent")) if len(rows): agent = "%s" % (rows[0]) if transferUUID and not agent: #agent hasn't been found yet rows = databaseInterface.queryAllSQL("""SELECT variableValue FROM UnitVariables WHERE unitType = '%s' AND unitUUID = '%s' AND variable = '%s';""" % ("Transfer", transferUUID, "activeAgent")) if len(rows): agent = "%s" % (rows[0]) return agent
def parseIdsSimple(FITS_XML, fileUUID): #simpleIdPlaces = [(table, tool, iter)] simpleIdPlaces = [ ("FileIDsByFitsDROIDMimeType", "Droid", "{http://www.nationalarchives.gov.uk/pronom/FileCollection}MimeType"), ("FITS DROID PUID", "Droid", "{http://www.nationalarchives.gov.uk/pronom/FileCollection}PUID"), ("FileIDsByFitsFfidentMimetype", "ffident", "mimetype"), ("FileIDsByFitsFileUtilityMimetype", "file utility", "mimetype"), ("FileIDsByFitsFileUtilityFormat", "file utility", "format"), ("FileIDsByFitsJhoveMimeType", "Jhove", "{}mimeType"), ("FileIDsByFitsJhoveFormat", "Jhove", "{}format") ] for toolKey, tool, iterOn in simpleIdPlaces: identified = [] fileIDs = [] for element in FITS_XML.iter("{http://hul.harvard.edu/ois/xml/ns/fits/fits_output}tool"): if element.get("name") == tool: toolVersion = element.get("version") for element2 in element.getiterator(iterOn): if element2.text != None: if element2.text in identified: continue identified.append(element2.text) sql = """SELECT fileID FROM FileIDsBySingleID WHERE tool = '%s' AND toolVersion='%s' AND id = '%s' AND FileIDsBySingleID.enabled = TRUE;""" % (toolKey, toolVersion, element2.text) fileIDS = databaseInterface.queryAllSQL(sql) if not fileIDS: print "No Archivematica entry found for:", toolKey, toolVersion, element2.text for fileID in fileIDS: sql = """INSERT INTO FilesIdentifiedIDs (fileUUID, fileID) VALUES ('%s', '%s');""" % (fileUUID, fileID[0]) databaseInterface.runSQL(sql) if fileIDs == [] and False: print >>sys.stderr, "No archivematica id for: ", tool, iterOn, element2.text for element in FITS_XML.findall(".//{http://hul.harvard.edu/ois/xml/ns/fits/fits_output}identity[@mimetype]"): format = element.get("mimetype") if format: sql = """SELECT FileIDsBySingleID.fileID, FileIDs.fileIDType, FileIDsBySingleID.id FROM FileIDsBySingleID JOIN FileIDs ON FileIDsBySingleID.fileID = FileIDs.pk WHERE FileIDs.fileIDType = 'c26227f7-fca8-4d98-9d8e-cfab86a2dd0a' AND FileIDsBySingleID.id = '%s' AND FileIDsBySingleID.enabled = TRUE AND FileIDs.enabled = TRUE;""" % (format) fileIDS = databaseInterface.queryAllSQL(sql) for fileID in fileIDS: sql = """INSERT INTO FilesIdentifiedIDs (fileUUID, fileID) VALUES ('%s', '%s');""" % (fileUUID, fileID[0]) databaseInterface.runSQL(sql) for element in FITS_XML.findall(".//{http://hul.harvard.edu/ois/xml/ns/fits/fits_output}identity[@format]"): format = element.get("format") if format: sql = """SELECT FileIDsBySingleID.fileID, FileIDs.fileIDType, FileIDsBySingleID.id FROM FileIDsBySingleID JOIN FileIDs ON FileIDsBySingleID.fileID = FileIDs.pk WHERE FileIDs.fileIDType = 'b0bcccfb-04bc-4daa-a13c-77c23c2bda85' AND FileIDsBySingleID.id = '%s' AND FileIDsBySingleID.enabled = TRUE AND FileIDs.enabled = TRUE;""" % (format) fileIDS = databaseInterface.queryAllSQL(sql) for fileID in fileIDS: sql = """INSERT INTO FilesIdentifiedIDs (fileUUID, fileID) VALUES ('%s', '%s');""" % (fileUUID, fileID[0]) databaseInterface.runSQL(sql)
def addLinks(): # rebuilds the micro-service chains used by the Transcoder based on the FPR. #Find all command relationships without links. CommandClassifications = {"3141bc6f-7f77-4809-9244-116b235e7330":"Normalize access", "3d1b570f-f500-4b3c-bbbc-4c58aad05c27":"Normalize preservation", "27c2969b-b6a0-441d-888d-85292b692064":"Normalize thumbnail", "5934dd0b-9f7c-4091-8607-47f519f5c095":"Skipable"} sql = "SELECT CommandRelationships.pk, commandClassification FROM CommandRelationships WHERE CommandRelationships.pk NOT IN (SELECT taskTypePKReference FROM TasksConfigs WHERE taskType = '5e70152a-9c5b-4c17-b823-c9298c546eeb');" rows = databaseInterface.queryAllSQL(sql) for cr, cc in rows: if cc not in CommandClassifications: print >>sys.stderr, "Invalid Command Classification (%s) for Command Relationship: %s" % (cc, cr) #create new taskConfig taskConfigPK = uuid.uuid4().__str__() taskConfigDescription = CommandClassifications[cc] if taskConfigDescription == "Skipable": continue sql = """INSERT INTO TasksConfigs SET pk='%s', taskType='5e70152a-9c5b-4c17-b823-c9298c546eeb', taskTypePKReference='%s', description='%s'""" % (taskConfigPK, cr, taskConfigDescription) databaseInterface.runSQL(sql) #create new link linkPK = uuid.uuid4().__str__() ##find default sql = """SELECT MicroserviceChainLink FROM DefaultCommandsForClassifications WHERE forClassification = '%s';""" % (cc) rows2 = databaseInterface.queryAllSQL(sql) if not rows2: linkDefaultNextLink = "NULL" else: linkDefaultNextLink = "'%s'" % (rows2[0][0]) sql = """INSERT INTO MicroServiceChainLinks SET pk = '%s', currentTask='%s', defaultNextChainLink = %s, microserviceGroup='Normalize';""" % (linkPK, taskConfigPK, linkDefaultNextLink) databaseInterface.runSQL(sql) #Create Exit Code exitCodesPK = uuid.uuid4().__str__() sql = """INSERT INTO MicroServiceChainLinksExitCodes SET pk = '%s', microServiceChainLink = '%s', exitCode = 0, nextMicroServiceChainLink = NULL;""" % (exitCodesPK, linkPK) databaseInterface.runSQL(sql)
def archivematicaCreateMETSRightsDspaceMDRef(fileUUID, filePath, transferUUID, itemdirectoryPath): ret = [] try: print fileUUID, filePath # find the mets file sql = ( "SELECT fileUUID, currentLocation FROM Files WHERE currentLocation = '%%SIPDirectory%%%s/mets.xml' AND transferUUID = '%s';" % (os.path.dirname(filePath), transferUUID) ) rows = databaseInterface.queryAllSQL(sql) for row in rows: metsFileUUID = row[0] metsLoc = row[1].replace("%SIPDirectory%", "", 1) metsLocation = os.path.join(os.path.dirname(itemdirectoryPath), "mets.xml") LABEL = "mets.xml-%s" % (metsFileUUID) ret.append(createMDRefDMDSec(LABEL, metsLocation, metsLoc)) base = os.path.dirname(os.path.dirname(itemdirectoryPath)) base2 = os.path.dirname(os.path.dirname(filePath)) for dir in os.listdir(base): fullDir = os.path.join(base, dir) fullDir2 = os.path.join(base2, dir) print fullDir if dir.startswith("ITEM"): print "continue" continue if not os.path.isdir(fullDir): continue sql = ( "SELECT fileUUID, currentLocation FROM Files WHERE currentLocation = '%%SIPDirectory%%%s/mets.xml' AND transferUUID = '%s';" % (fullDir2, transferUUID) ) print sql rows = databaseInterface.queryAllSQL(sql) for row in rows: print row metsFileUUID = row[0] metsLoc = row[1].replace("%SIPDirectory%", "", 1) metsLocation = os.path.join(fullDir, "mets.xml") print metsLocation LABEL = "mets.xml-%s" % (metsFileUUID) ret.append(createMDRefDMDSec(LABEL, metsLocation, metsLoc)) except Exception as inst: print >> sys.stderr, "Error creating mets dspace mdref", fileUUID, filePath print >> sys.stderr, type(inst), inst.args sharedVariablesAcrossModules.globalErrorCount += 1 return ret
def getrightsGranted(pk, parent): sql = ( "SELECT RightsStatementRightsGranted.pk, act, startDate, endDate, endDateOpen FROM RightsStatementRightsGranted WHERE fkRightsStatement = %d" % (pk) ) rows = databaseInterface.queryAllSQL(sql) for row in rows: rightsGranted = etree.SubElement(parent, "rightsGranted") etree.SubElement(rightsGranted, "act").text = row[1] restriction = "Undefined" sql = ( """SELECT restriction FROM RightsStatementRightsGrantedRestriction WHERE RightsStatementRightsGrantedRestriction.fkRightsStatementRightsGranted = %s """ % (row[0]) ) rows2 = databaseInterface.queryAllSQL(sql) for row2 in rows2: restriction = row2[0] if not restriction.lower() in ["disallow", "conditional", "allow"]: print >>sys.stderr, "The value of element restriction must be: 'Allow', 'Disallow', or 'Conditional':", restriction sharedVariablesAcrossModules.globalErrorCount += 1 etree.SubElement(rightsGranted, "restriction").text = restriction if row[2] or row[3] or row[4]: if restriction.lower() in ["allow"]: termOfGrant = etree.SubElement(rightsGranted, "termOfGrant") elif restriction.lower() in ["disallow", "conditional"]: termOfGrant = etree.SubElement(rightsGranted, "termOfRestriction") else: print >>sys.stderr, "The value of element restriction must be: 'Allow', 'Dissallow', or 'Conditional'" sharedVariablesAcrossModules.globalErrorCount += 1 continue if row[2]: etree.SubElement(termOfGrant, "startDate").text = formatDate(row[2]) if row[4]: etree.SubElement(termOfGrant, "endDate").text = "OPEN" elif row[3]: etree.SubElement(termOfGrant, "endDate").text = formatDate(row[3]) # 4.1.6.4 rightsGrantedNote (O, R) sql = ( "SELECT rightsGrantedNote FROM RightsStatementRightsGrantedNote WHERE fkRightsStatementRightsGranted = %d;" % (row[0]) ) rows2 = databaseInterface.queryAllSQL(sql) for row2 in rows2: etree.SubElement(rightsGranted, "rightsGrantedNote").text = row2[0]
def alreadyNormalizedManually(self, unit, ComandClassification): try: SIPUUID = unit.owningUnit.UUID fileUUID = unit.UUID SIPPath = unit.owningUnit.currentPath filePath = unit.currentPath bname = os.path.basename(filePath) dirName = os.path.dirname(filePath) i = bname.rfind(".") if i != -1: bname = bname[:i] path = os.path.join(dirName, bname) if ComandClassification == "preservation": path = path.replace("%SIPDirectory%objects/", "%SIPDirectory%objects/manualNormalization/preservation/") elif ComandClassification == "access": path = path.replace("%SIPDirectory%objects/", "%SIPDirectory%objects/manualNormalization/access/") else: return False sql = """SELECT fileUUID FROM Files WHERE sipUUID = '%s' AND currentLocation LIKE '%s%%' AND removedTime = 0;""" % (SIPUUID, path.replace("%", "\%")) ret = bool(databaseInterface.queryAllSQL(sql)) return ret except Exception as inst: print "DEBUG EXCEPTION!" traceback.print_exc(file=sys.stdout) print type(inst) # the exception instance print inst.args
def getmicroServiceChainLink(self, variable, variableValue, defaultMicroServiceChainLink): sql = """SELECT pk, microServiceChainLink FROM UnitVariables WHERE unitType = '%s' AND unitUUID = '%s' AND variable = '%s';""" % (self.unitType, self.UUID, variable) rows = databaseInterface.queryAllSQL(sql) if len(rows): return rows[0][1] else: return defaultMicroServiceChainLink
def getFileUUIDofSourceFile(transferUUID, sourceFilePath): ret = "" sql = """SELECT fileUUID FROM Files WHERE removedTime = 0 AND transferUUID = '%s' AND currentLocation LIKE '%s%%';""" % (transferUUID, sourceFilePath.replace('%', '\%')) rows = databaseInterface.queryAllSQL(sql) if len(rows): ret = rows[0][0] return ret
def findExistingFileID(ext): description = 'A %s file' % (ext) sql = """SELECT pk, validPreservationFormat, validAccessFormat FROM FileIDs where fileIDType = '16ae42ff-1018-4815-aac8-cceacd8d88a8' AND description = '%s';""" % (description) ret = databaseInterface.queryAllSQL(sql) if not len(ret): return "" return ret[0]
def xmlCreateFileAssociationBetween(originalFileFullPath, outputFromNormalizationFileFullPath, SIPFullPath, sipUUID, eventDetailText, eventOutcomeDetailNote, outputFileUUID=""): #assign file UUID date = databaseInterface.getUTCDate() if outputFileUUID == "": outputFileUUID = uuid.uuid4().__str__() originalFilePathRelativeToSIP = originalFileFullPath.replace(SIPFullPath,"%SIPDirectory%", 1) sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(originalFilePathRelativeToSIP) + "' AND Files.sipUUID = '" + sipUUID + "';" print sql rows = databaseInterface.queryAllSQL(sql) print rows fileUUID = rows[0][0] filePathRelativeToSIP = outputFromNormalizationFileFullPath.replace(SIPFullPath,"%SIPDirectory%", 1) addFileToSIP(filePathRelativeToSIP, outputFileUUID, sipUUID, uuid.uuid4().__str__(), date, sourceType="creation", use="preservation") updateSizeAndChecksum(outputFileUUID, outputFromNormalizationFileFullPath, date, uuid.uuid4().__str__()) taskUUID = uuid.uuid4().__str__() insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=taskUUID, \ eventType="normalization", \ eventDateTime=date, \ eventDetail=eventDetailText, \ eventOutcome="", \ eventOutcomeDetailNote=eventOutcomeDetailNote) insertIntoDerivations(sourceFileUUID=fileUUID, derivedFileUUID=outputFileUUID, relatedEventUUID=taskUUID)
def getMaxLastUpdate(): sql = """SELECT variableValue FROM UnitVariables WHERE unitType = 'FPR' AND unitUUID = 'Client' AND variable = 'maxLastUpdate' """ rows = databaseInterface.queryAllSQL(sql) if rows: maxLastUpdate = rows[0][0] else: maxLastUpdate = "2000-01-01T00:00:00" return maxLastUpdate
def bridgeUserSelections(): "" sql="SELECT MicroServiceChainChoice.choiceAvailableAtLink, MicroServiceChains.startingLink FROM MicroServiceChainChoice JOIN MicroServiceChains ON MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk;" rows = databaseInterface.queryAllSQL(sql) for row in rows: choiceAvailableAtLink, startingLink = row if choiceAvailableAtLink and startingLink: addArrow(choiceAvailableAtLink, startingLink, color='green')
def getDashboardUUID(): sql = "SELECT value FROM DashboardSettings WHERE name='%s'" sql = sql % (MySQLdb.escape_string('dashboard_uuid')) rows = databaseInterface.queryAllSQL(sql) if len(rows) == 1: return rows[0][0]
def getDocumentationIdentifier(pk, parent): sql = "SELECT pk, copyrightDocumentationIdentifierType, copyrightDocumentationIdentifierValue, copyrightDocumentationIdentifierRole FROM RightsStatementCopyrightDocumentationIdentifier WHERE fkRightsStatementCopyrightInformation = %d" % (pk) rows = databaseInterface.queryAllSQL(sql) for row in rows: statuteInformation = etree.SubElement(parent, "copyrightDocumentationIdentifier") etree.SubElement(statuteInformation, "copyrightDocumentationIdentifierType").text = row[1] etree.SubElement(statuteInformation, "copyrightDocumentationIdentifierValue").text = row[2] etree.SubElement(statuteInformation, "copyrightDocumentationRole").text = row[3]
def jobChainTextGet(leadIn, pk, indent=""): sql = """SELECT startingLink, description FROM MicroServiceChains WHERE pk = '%s';""" % (pk.__str__()) rows = databaseInterface.queryAllSQL(sql) for row in rows: startingLink = row[0] description = row[1] leadOut = "-->[" + description + " MicroServiceChain]" writePlant( ("%s \"%s\"") % (leadIn, description + " MicroServiceChain") ) jobChainLinkTextGet(indent, leadOut, startingLink)
def getTrimAmdSec(baseDirectoryPath, fileGroupIdentifier): ret = etree.Element("digiprovMD") sql = "SELECT currentLocation FROM Files WHERE removedTime = 0 AND %s = '%s' AND fileGrpUse='TRIM container metadata';" % ('sipUUID', fileGroupIdentifier) rows = databaseInterface.queryAllSQL(sql) for row in rows: attrib = {"LABEL":"ContainerMetadata.xml", xlinkBNS + "href":row[0].replace("%SIPDirectory%", "", 1), "MDTYPE":"OTHER", "OTHERMDTYPE":"CUSTOM", 'LOCTYPE':"OTHER", 'OTHERLOCTYPE':"SYSTEM"} etree.SubElement(ret, "mdRef", attrib=attrib) return ret
def getFileUUIDLike(filePath, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith): """Dest needs to be the actual full destination path with filename.""" ret = {} srcDB = filePath.replace(unitPath, unitPathReplaceWith) sql = "SELECT Files.fileUUID, Files.currentLocation FROM Files WHERE removedTime = 0 AND Files.currentLocation LIKE '" + MySQLdb.escape_string(srcDB) + "' AND " + unitIdentifierType + " = '" + unitIdentifier + "';" rows = databaseInterface.queryAllSQL(sql) for row in rows: ret[row[1]] = row[0] return ret
def bridgeLoadVariable(): "" sql = "SELECT MicroServiceChainLinks.pk, TasksConfigsUnitVariableLinkPull.variable, TasksConfigsUnitVariableLinkPull.defaultMicroServiceChainLink FROM MicroServiceChainLinks JOIN TasksConfigs ON MicroServiceChainLinks.currentTask = TasksConfigs.pk JOIN TasksConfigsUnitVariableLinkPull ON TasksConfigsUnitVariableLinkPull.pk = TasksConfigs.taskTypePKReference WHERE TasksConfigs.taskType = 'c42184a3-1a7f-4c4d-b380-15d8d97fdd11';" rows = databaseInterface.queryAllSQL(sql) for row in rows: count = 0 microServiceChainLink, variable, defaultMicroServiceChainLink = row sql = "SELECT MicroServiceChainLinks.pk, TasksConfigsSetUnitVariable.variable, TasksConfigsSetUnitVariable.microServiceChainLink FROM MicroServiceChainLinks JOIN TasksConfigs ON MicroServiceChainLinks.currentTask = TasksConfigs.pk JOIN TasksConfigsSetUnitVariable ON TasksConfigsSetUnitVariable.pk = TasksConfigs.taskTypePKReference WHERE TasksConfigs.taskType = '6f0b612c-867f-4dfd-8e43-5b35b7f882d7' AND TasksConfigsSetUnitVariable.variable = '%s';" % (variable) rows2 = databaseInterface.queryAllSQL(sql) for row2 in rows2: microServiceChainLink2, variable, microServiceChainLinkDest = row2 addArrow(microServiceChainLink, microServiceChainLinkDest, color="brown") count +=1 if defaultMicroServiceChainLink: addArrow(microServiceChainLink, defaultMicroServiceChainLink, color="brown") if count == 0: print "no bridge variable set for: ", linkUUIDtoNodeName[microServiceChainLink] return
def __init__(self, jobChainLink, pk, unit): global outputLock self.tasks = {} self.tasksLock = threading.Lock() self.pk = pk self.jobChainLink = jobChainLink self.exitCode = 0 self.clearToNextLink = False opts = {"inputFile":"%relativeLocation%", "fileUUID":"%fileUUID%", 'commandClassifications':'%commandClassifications%', "taskUUID":"%taskUUID%", "objectsDirectory":"%SIPObjectsDirectory%", "logsDirectory":"%SIPLogsDirectory%", "sipUUID":"%SIPUUID%", "sipPath":"%SIPDirectory%", "fileGrpUse":"%fileGrpUse%", "normalizeFileGrpUse":"%normalizeFileGrpUse%", "excludeDirectory":"%excludeDirectory%", "standardErrorFile":"%standardErrorFile%", "standardOutputFile":"%standardOutputFile%"} SIPReplacementDic = unit.getReplacementDic(unit.currentPath) for optsKey, optsValue in opts.iteritems(): if self.jobChainLink.passVar != None: if isinstance(self.jobChainLink.passVar, replacementDic): opts[optsKey] = self.jobChainLink.passVar.replace(opts[optsKey])[0] commandReplacementDic = unit.getReplacementDic() for key, value in commandReplacementDic.iteritems(): opts[optsKey] = opts[optsKey].replace(key, value) for key, value in SIPReplacementDic.iteritems(): opts[optsKey] = opts[optsKey].replace(key, value) commandReplacementDic = unit.getReplacementDic() sql = """SELECT CommandRelationships.pk FROM CommandRelationships JOIN Commands ON CommandRelationships.command = Commands.pk WHERE CommandRelationships.pk = '%s';""" % (pk.__str__()) rows = databaseInterface.queryAllSQL(sql) taskCount = 0 tasksList = [] if rows: self.tasksLock.acquire() for row in rows: UUID = uuid.uuid4().__str__() opts["taskUUID"] = UUID opts["CommandRelationship"] = pk.__str__() execute = "transcoder_cr%s" % (pk) deUnicode(execute) arguments = row.__str__() standardOutputFile = opts["standardOutputFile"] standardErrorFile = opts["standardErrorFile"] self.standardOutputFile = standardOutputFile self.standardErrorFile = standardErrorFile self.execute = execute self.arguments = arguments task = taskStandard(self, execute, opts, standardOutputFile, standardErrorFile, outputLock=outputLock, UUID=UUID) self.tasks[UUID] = task databaseFunctions.logTaskCreatedSQL(self, commandReplacementDic, UUID, arguments) taskCount += 1 tasksList.append(task) self.tasksLock.release() for task in tasksList: task.performTask() else: self.jobChainLink.linkProcessingComplete(self.exitCode)
def setMaildirFiles(sipUUID, sipPath): for root, dirs, files in os.walk(os.path.join(sipPath, "objects", "Maildir")): for file in files: fileRelativePath = os.path.join(root, file).replace(sipPath, "%SIPDirectory%", 1) sql = """SELECT fileUUID FROM Files WHERE removedTime = 0 AND sipUUID = '%s' AND currentLocation = '%s';""" % (sipUUID, fileRelativePath) rows = databaseInterface.queryAllSQL(sql) if len(rows): fileUUID = rows[0][0] sql = """INSERT INTO FilesIdentifiedIDs (fileUUID, fileID) VALUES ('%s', (SELECT pk FROM FileIDs WHERE enabled = TRUE AND description = 'A maildir email file')); """ % (fileUUID) databaseInterface.runSQL(sql)
def getTrimDmdSec(baseDirectoryPath, fileGroupIdentifier): #containerMetadata ret = etree.Element("dmdSec") mdWrap = etree.SubElement(ret, "mdWrap") mdWrap.set("MDTYPE", "DC") xmlData = etree.SubElement(mdWrap, "xmlData") dublincore = etree.SubElement(xmlData, "dublincore", attrib=None, nsmap={None:dctermsNS}) dublincore.set(xsiBNS+"schemaLocation", dctermsNS + " http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd") tree = etree.parse(os.path.join(baseDirectoryPath, "objects", "ContainerMetadata.xml")) root = tree.getroot() etree.SubElement(dublincore, dctermsBNS + "title").text = root.find("Container/TitleFreeTextPart").text etree.SubElement(dublincore, dctermsBNS + "provenance").text = "Department: %s; OPR: %s" % (root.find("Container/Department").text, root.find("Container/OPR").text) etree.SubElement(dublincore, dctermsBNS + "isPartOf").text = root.find("Container/FullClassificationNumber").text etree.SubElement(dublincore, dctermsBNS + "identifier").text = root.find("Container/RecordNumber").text.split('/')[-1] #get objects count sql = "SELECT fileUUID FROM Files WHERE removedTime = 0 AND %s = '%s' AND fileGrpUse='original';" % ('sipUUID', fileGroupIdentifier) rows = databaseInterface.queryAllSQL(sql) etree.SubElement(dublincore, dctermsBNS + "extent").text = "%d digital objects" % (len(rows)) sql = "SELECT currentLocation FROM Files WHERE removedTime = 0 AND %s = '%s' AND fileGrpUse='TRIM file metadata';" % ('sipUUID', fileGroupIdentifier) rows = databaseInterface.queryAllSQL(sql) minDateMod = None maxDateMod = None for row in rows: fileMetadataXmlPath = row[0].replace('%SIPDirectory%', baseDirectoryPath, 1) if os.path.isfile(fileMetadataXmlPath): tree2 = etree.parse(fileMetadataXmlPath) root2 = tree2.getroot() dateMod = root2.find("Document/DateModified").text if minDateMod == None or dateMod < minDateMod: minDateMod = dateMod if maxDateMod == None or dateMod > maxDateMod: maxDateMod = dateMod etree.SubElement(dublincore, dctermsBNS + "date").text = "%s/%s" % (minDateMod, maxDateMod) #print etree.tostring(dublincore, pretty_print = True) return ret
def bridgeExitCodes(): "" global allLinks sql = """SELECT microServiceChainLink, nextMicroServiceChainLink FROM MicroServiceChainLinksExitCodes;""" links = databaseInterface.queryAllSQL(sql) for link in links: microServiceChainLink, nextMicroServiceChainLink = link if nextMicroServiceChainLink: addArrow(microServiceChainLink, nextMicroServiceChainLink) return
def printFidoInsert(itemdirectoryPath): global runSQLInserts ext = findExtension(itemdirectoryPath).lower() if not ext: return fileID = findExistingFileID(ext) if not fileID: return fileID, validPreservationFormat, validAccessFormat = fileID FidoFileID = getFidoID(itemdirectoryPath).strip() if not FidoFileID: return # check for existing rule sql = ( """SELECT pk FROM FileIDs WHERE fileIDType = 'afdbee13-eec5-4182-8c6c-f5638ee290f3' AND description = '%s';""" % FidoFileID ) if databaseInterface.queryAllSQL(sql): a = "skip" # return if FidoFileID in idsDone: return fileIDUUID = uuid.uuid4().__str__() sql = """INSERT INTO FileIDs (pk, description, validPreservationFormat, validAccessFormat, fileIDType) VALUES ('%s', '%s', %s, %s, 'afdbee13-eec5-4182-8c6c-f5638ee290f3');""" % ( fileIDUUID, FidoFileID, validPreservationFormat, validAccessFormat, ) idsDone.append(FidoFileID) print sql if runSQLInserts: databaseInterface.runSQL(sql) FileIDsBySingleIDUUID = uuid.uuid4().__str__() sql = """INSERT INTO FileIDsBySingleID (pk, fileID, id, tool, toolVersion) VALUES ('%s', '%s', '%s', 'Fido', '1.1.2');""" % ( FileIDsBySingleIDUUID, fileIDUUID, FidoFileID, ) print sql if runSQLInserts: databaseInterface.runSQL(sql) printNewCommandRelationships(fileID, fileIDUUID) print
def setMaxLastUpdate(maxLastUpdate): sql = """SELECT pk FROM UnitVariables WHERE unitType = 'FPR' AND unitUUID = 'Client' AND variable = 'maxLastUpdate'; """ rows = databaseInterface.queryAllSQL(sql) if rows: sql = """UPDATE UnitVariables SET variableValue='%s' WHERE unitType = 'FPR' AND unitUUID = 'Client' AND variable = 'maxLastUpdate';""" % (maxLastUpdate) databaseInterface.runSQL(sql) else: pk = uuid.uuid4().__str__() sql = """INSERT INTO UnitVariables SET pk='%s', variableValue='%s', unitType='FPR', unitUUID = 'Client', variable = 'maxLastUpdate';""" % (pk, maxLastUpdate) databaseInterface.runSQL(sql) return maxLastUpdate
def __init__(self, commandLinker, replacementDic, opts, onSuccess): self.pk = commandLinker self.replacementDic = replacementDic self.opts = opts self.onSuccess = onSuccess sql = "SELECT command FROM CommandRelationships where pk = %s;" % (self.pk.__str__()) rows = databaseInterface.queryAllSQL(sql) if rows: for row in rows: self.command = row[0] self.commandObject = Command(self.command.__str__(), replacementDic, self.onSuccess, opts)
def getArchivematicaFileID(FidoFileID, FidoVersion): sql = """SELECT fileID FROM FileIDsBySingleID WHERE tool = 'Fido' AND toolVersion = '%s' AND id='%s';""" % (FidoVersion, FidoFileID) ret = databaseInterface.queryAllSQL(sql) if not len(ret): print >>sys.stderr, "No Archivematica format id for Fido %s: %s" % (FidoVersion, FidoFileID) exit(0) return ret[0][0]
def bridgeMagicChainLinks(): "" #find the assignments sql = "SELECT MicroServiceChainLinks.pk, TasksConfigsAssignMagicLink.execute FROM MicroServiceChainLinks JOIN TasksConfigs ON MicroServiceChainLinks.currentTask = TasksConfigs.pk JOIN TasksConfigsAssignMagicLink ON TasksConfigsAssignMagicLink.pk = TasksConfigs.taskTypePKReference WHERE TasksConfigs.taskType = '3590f73d-5eb0-44a0-91a6-5b2db6655889';" rows = databaseInterface.queryAllSQL(sql) for row in rows: microServiceChainLink, magicLink = row node = G.get_node(linkUUIDtoNodeName[microServiceChainLink]) visitedNodes = {node:None} #prevents looping count = bridgeMagicChainLinksRecursiveAssist(node, magicLink, visitedNodes) if count == 0: print "no loads of set link: ", node return
def xmlCreateFileAssociationBetween(originalFileFullPath, outputFromNormalizationFileFullPath, SIPFullPath, sipUUID, eventDetailText, eventOutcomeDetailNote, outputFileUUID=""): #assign file UUID date = databaseInterface.getUTCDate() if outputFileUUID == "": outputFileUUID = uuid.uuid4().__str__() originalFilePathRelativeToSIP = originalFileFullPath.replace( SIPFullPath, "%SIPDirectory%", 1) sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string( originalFilePathRelativeToSIP ) + "' AND Files.sipUUID = '" + sipUUID + "';" print sql rows = databaseInterface.queryAllSQL(sql) print rows fileUUID = rows[0][0] filePathRelativeToSIP = outputFromNormalizationFileFullPath.replace( SIPFullPath, "%SIPDirectory%", 1) addFileToSIP(filePathRelativeToSIP, outputFileUUID, sipUUID, uuid.uuid4().__str__(), date, sourceType="creation", use="preservation") updateSizeAndChecksum(outputFileUUID, outputFromNormalizationFileFullPath, date, uuid.uuid4().__str__()) taskUUID = uuid.uuid4().__str__() insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=taskUUID, \ eventType="normalization", \ eventDateTime=date, \ eventDetail=eventDetailText, \ eventOutcome="", \ eventOutcomeDetailNote=eventOutcomeDetailNote) insertIntoDerivations(sourceFileUUID=fileUUID, derivedFileUUID=outputFileUUID, relatedEventUUID=taskUUID)
def updateFileLocation2(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith): """Dest needs to be the actual full destination path with filename.""" srcDB = src.replace(unitPath, unitPathReplaceWith) dstDB = dst.replace(unitPath, unitPathReplaceWith) sql = "SELECT Files.fileUUID, Files.currentLocation FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(srcDB) + "' AND " + unitIdentifierType + " = '" + unitIdentifier + "';" rows = databaseInterface.queryAllSQL(sql) if len(rows) != 1: print sys.stderr, len(rows), "rows", sql, rows exit(4) for row in rows: fileUUID = row[0] location = row[1] sql = """UPDATE Files SET currentLocation='%s' WHERE fileUUID='%s';""" % (MySQLdb.escape_string(dstDB), fileUUID) databaseInterface.runSQL(sql) print "moving: ", src, dst shutil.move(src, dst)
def connect_and_change_transfer_file_status(uuid, status): # get file UUIDs for each file in the SIP sql = "SELECT fileUUID from Files WHERE transferUUID='" + MySQLdb.escape_string(uuid) + "'" rows = databaseInterface.queryAllSQL(sql) if len(rows) > 0: conn = connect_and_create_index('transfers') # cycle through file UUIDs and update status for row in rows: documents = conn.search_raw(query=pyes.FieldQuery(pyes.FieldParameter('fileuuid', row[0]))) if len(documents['hits']['hits']) > 0: document_id = documents['hits']['hits'][0]['_id'] conn.update({'status': status}, 'transfers', 'transferfile', document_id) return len(rows)
def addAccessionEvent(fileUUID, transferUUID, date): sql = """SELECT accessionID FROM Transfers WHERE transferUUID = '%s';""" % ( transferUUID) accessionID = databaseInterface.queryAllSQL(sql)[0][0] if accessionID: eventIdentifierUUID = uuid.uuid4().__str__() eventOutcomeDetailNote = "accession#" + MySQLdb.escape_string( accessionID) insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=eventIdentifierUUID, \ eventType="registration", \ eventDateTime=date, \ eventDetail="", \ eventOutcome="", \ eventOutcomeDetailNote=eventOutcomeDetailNote)
def connect_and_remove_sip_transfer_files(uuid): # get file UUIDs for each file in the SIP sql = "SELECT fileUUID from Files WHERE sipUUID='" + MySQLdb.escape_string( uuid) + "'" rows = databaseInterface.queryAllSQL(sql) if len(rows) > 0: conn = connect_and_create_index('transfers') # cycle through file UUIDs and delete files from transfer backlog for row in rows: documents = conn.search_raw( query=pyes.FieldQuery(pyes.FieldParameter('fileuuid', row[0]))) if len(documents['hits']['hits']) > 0: document_id = documents['hits']['hits'][0]['_id'] conn.delete('transfers', 'transferfile', document_id)
def bridgeMagicChainLinksRecursiveAssist(node, magicLink, visitedNodes): "" ret = 0 link = node[1:node.find('}')] sql = "SELECT MicroServiceChainLinks.pk FROM MicroServiceChainLinks JOIN TasksConfigs ON MicroServiceChainLinks.currentTask = TasksConfigs.pk WHERE TasksConfigs.taskType = '6fe259c2-459d-4d4b-81a4-1b9daf7ee2e9' AND MicroServiceChainLinks.pk = '%s';" % (link) #if it's loading it, set the load and return rows = databaseInterface.queryAllSQL(sql) if len(rows): addArrow(link, magicLink, color="brown") return 1 else: for neigh in G.neighbors_iter(node): if neigh in visitedNodes: continue visitedNodes[neigh] = None ret += bridgeMagicChainLinksRecursiveAssist(neigh, magicLink, visitedNodes) return ret
def createWatchedDirectories(): global processedJobChainLinks sql = """SELECT watchedDirectoryPath, chain, expectedType FROM WatchedDirectories;""" rows = databaseInterface.queryAllSQL(sql) i = 1 for row in rows: watchedDirectoryPath = row[0] chain = row[1] expectedType = row[2] writePlant( "@startuml WatchedDirectory-", watchedDirectoryPath.replace("%watchDirectoryPath%", "").replace( "/", "_") + ".png") #img/activity_img10.png writePlant("title " + watchedDirectoryPath) jobChainTextGet("(*) --> [" + watchedDirectoryPath + "]", chain) writePlant("@enduml") i += 1
def printFidoInsert(itemdirectoryPath): global runSQLInserts ext = findExtension(itemdirectoryPath).lower() if not ext: return fileID = findExistingFileID(ext) if not fileID: return fileID, validPreservationFormat, validAccessFormat = fileID FidoFileID = getFidoID(itemdirectoryPath).strip() if not FidoFileID: return #check for existing rule sql = """SELECT pk FROM FileIDs WHERE fileIDType = 'afdbee13-eec5-4182-8c6c-f5638ee290f3' AND description = '%s';""" % FidoFileID if databaseInterface.queryAllSQL(sql): a = "skip" #return if FidoFileID in idsDone: return fileIDUUID = uuid.uuid4().__str__() sql = """INSERT INTO FileIDs (pk, description, validPreservationFormat, validAccessFormat, fileIDType) VALUES ('%s', '%s', %s, %s, 'afdbee13-eec5-4182-8c6c-f5638ee290f3');""" % ( fileIDUUID, FidoFileID, validPreservationFormat, validAccessFormat) idsDone.append(FidoFileID) print sql if runSQLInserts: databaseInterface.runSQL(sql) FileIDsBySingleIDUUID = uuid.uuid4().__str__() sql = """INSERT INTO FileIDsBySingleID (pk, fileID, id, tool, toolVersion) VALUES ('%s', '%s', '%s', 'Fido', '1.1.2');""" % ( FileIDsBySingleIDUUID, fileIDUUID, FidoFileID) print sql if runSQLInserts: databaseInterface.runSQL(sql) printNewCommandRelationships(fileID, fileIDUUID) print
def printTikaInsert(itemdirectoryPath): global runSQLInserts ext = findExtension(itemdirectoryPath).lower() if not ext: return fileID = findExistingFileID(ext) if not fileID: return fileID, validPreservationFormat, validAccessFormat = fileID tikaFileID = getTikaID(itemdirectoryPath).strip() if not tikaFileID: return #check for existing rule sql = """SELECT pk FROM FileIDs WHERE fileIDType = '1d8f3bb3-da8a-4ef6-bac7-b65942df83fc' AND description = '%s';""" % tikaFileID if databaseInterface.queryAllSQL(sql): a = "skip" #return if tikaFileID in idsDone: return fileIDUUID = uuid.uuid4().__str__() sql = """INSERT INTO FileIDs (pk, description, validPreservationFormat, validAccessFormat, fileIDType) VALUES ('%s', '%s', %s, %s, '1d8f3bb3-da8a-4ef6-bac7-b65942df83fc');""" % ( fileIDUUID, tikaFileID, validPreservationFormat, validAccessFormat) idsDone.append(tikaFileID) print sql if runSQLInserts: databaseInterface.runSQL(sql) FileIDsBySingleIDUUID = uuid.uuid4().__str__() sql = """INSERT INTO FileIDsBySingleID (pk, fileID, id, tool, toolVersion) VALUES ('%s', '%s', '%s', 'Tika', '1.3');""" % (FileIDsBySingleIDUUID, fileIDUUID, tikaFileID) print sql if runSQLInserts: databaseInterface.runSQL(sql) printNewCommandRelationships(fileID, fileIDUUID) print
def createLoadMagic(): global processedJobChainLinks sql = """SELECT TasksConfigs.description, StandardTasksConfigs.execute FROM TasksConfigs JOIN StandardTasksConfigs ON TasksConfigs.taskTypePKReference = StandardTasksConfigs.pk WHERE TasksConfigs.taskType = 3;""" rows = databaseInterface.queryAllSQL(sql) i = 1 for row in rows: description = row[0] chainLink = row[1] processedJobChainLinks = [] writePlant("@startuml LoadMagicLink-", description, "-", chainLink, ".png") #img/activity_img10.png writePlant("title ", description, "-", chainLink) jobChainLinkTextGet("", "(*) --> [" + description + "]", int(chainLink), label="") writePlant("@enduml") i += 1
def loadAllLinks(): "" sql = """SELECT MicroServiceChainLinks.pk, MicroServiceChainLinks.defaultNextChainLink, TasksConfigs.description FROM MicroServiceChainLinks JOIN TasksConfigs ON currentTask = TasksConfigs.pk WHERE TasksConfigs.taskType != '5e70152a-9c5b-4c17-b823-c9298c546eeb';""" links = databaseInterface.queryAllSQL(sql) for link in links: pk, defaultNextChainLink, description = link if pk in excludedNodes: continue nodeName = "{%s}%s" % (pk, description) G.add_node(nodeName, URL="MicroServiceChainLinks/%s" % pk, label=nodeName, id=nodeName) linkUUIDtoNodeName[pk] = nodeName for link in links: pk, defaultNextChainLink, description = link if defaultNextChainLink != None: addArrow(pk, defaultNextChainLink) return
def startThread(threadNumber): """Setup a gearman client, for the thread.""" gm_worker = gearman.GearmanWorker( [config.get('MCPClient', "MCPArchivematicaServer")]) hostID = gethostname() + "_" + threadNumber.__str__() gm_worker.set_client_id(hostID) for key in supportedModules.iterkeys(): printOutputLock.acquire() print "registering:", '"' + key + '"' printOutputLock.release() gm_worker.register_task(key, executeCommand) #load transoder jobs sql = """SELECT CommandRelationships.pk FROM CommandRelationships JOIN Commands ON CommandRelationships.command = Commands.pk JOIN CommandsSupportedBy ON Commands.supportedBy = CommandsSupportedBy.pk WHERE CommandsSupportedBy.description = 'supported by default archivematica client';""" rows = databaseInterface.queryAllSQL(sql) if rows: for row in rows: CommandRelationshipsPK = row[0] key = "transcoder_cr%s" % (CommandRelationshipsPK.__str__()) printOutputLock.acquire() print "registering:", '"' + key + '"' printOutputLock.release() gm_worker.register_task( key, transcoderNormalizer.executeCommandReleationship) failMaxSleep = 30 failSleep = 1 failSleepIncrementor = 2 while True: try: gm_worker.work() except gearman.errors.ServerUnavailable as inst: print >> sys.stderr, inst.args print >> sys.stderr, "Retrying in %d seconds." % (failSleep) time.sleep(failSleep) if failSleep < failMaxSleep: failSleep += failSleepIncrementor
def setVariable(self, variable, variableValue, microServiceChainLink): if not variableValue: variableValue = "" if not microServiceChainLink: microServiceChainLink = "NULL" else: microServiceChainLink = "'%s'" % (microServiceChainLink) variableValue = databaseInterface.MySQLdb.escape_string(variableValue) sql = """SELECT pk FROM UnitVariables WHERE unitType = '%s' AND unitUUID = '%s' AND variable = '%s';""" % ( self.unitType, self.UUID, variable) rows = databaseInterface.queryAllSQL(sql) if rows: for row in rows: sql = """UPDATE UnitVariables SET variable='%s', variableValue='%s', microServiceChainLink=%s WHERE pk = '%s'; """ % ( variable, variableValue, microServiceChainLink, row[0]) databaseInterface.runSQL(sql) else: sql = """INSERT INTO UnitVariables (pk, unitType, unitUUID, variable, variableValue, microserviceChainLink) VALUES ('%s', '%s', '%s', '%s', '%s', %s);""" % ( uuid.uuid4().__str__(), self.unitType, self.UUID, variable, variableValue, microServiceChainLink) databaseInterface.runSQL(sql)
def removePreservationFiles(SIPDirectory, SIPUUID): try: sql = """SELECT fileUUID, currentLocation FROM Files WHERE SIPUUID = '%s' AND removedTime = 0 AND fileGrpUse = 'preservation';""" % ( SIPUUID) files = databaseInterface.queryAllSQL(sql) for file in files: try: fileUUID, currentLocation = file sql = """UPDATE Files SET removedTime=NOW() WHERE fileUUID = '%s';""" % ( fileUUID) databaseInterface.runSQL(sql) os.remove( currentLocation.replace("%SIPDirectory%", SIPDirectory, 1)) except Exception as inst: traceback.print_exc(file=sys.stdout) print type(inst) # the exception instance print inst.args except Exception as inst: traceback.print_exc(file=sys.stdout) print type(inst) # the exception instance print inst.args
def updateDirectoryLocation(src, dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith): srcDB = src.replace(unitPath, unitPathReplaceWith) if not srcDB.endswith("/") and srcDB != unitPathReplaceWith: srcDB += "/" dstDB = dst.replace(unitPath, unitPathReplaceWith) if not dstDB.endswith("/") and dstDB != unitPathReplaceWith: dstDB += "/" sql = "SELECT Files.fileUUID, Files.currentLocation FROM Files WHERE removedTime = 0 AND Files.currentLocation LIKE '" + MySQLdb.escape_string(srcDB) + "%' AND " + unitIdentifierType + " = '" + unitIdentifier + "';" rows = databaseInterface.queryAllSQL(sql) for row in rows: fileUUID = row[0] location = row[1] destDB = location.replace(srcDB, dstDB) sql = """UPDATE Files SET currentLocation='%s' WHERE fileUUID='%s';""" % (MySQLdb.escape_string(destDB), fileUUID) databaseInterface.runSQL(sql) if os.path.isdir(dst): if dst.endswith("/"): dst += "." else: dst += "/." print "moving: ", src, dst shutil.move(src, dst)
def findOrCreateSipInDB(path, waitSleep=dbWaitSleep): """Matches a directory to a database sip by it's appended UUID, or path. If it doesn't find one, it will create one""" UUID = "" path = path.replace(config.get('MCPServer', "sharedDirectory"), "%sharedPath%", 1) #find UUID on end of SIP path uuidLen = -36 if isUUID(path[uuidLen - 1:-1]): UUID = path[uuidLen - 1:-1] sql = """SELECT sipUUID FROM SIPs WHERE sipUUID = '""" + UUID + "';" rows = databaseInterface.queryAllSQL(sql) if not rows: databaseFunctions.createSIP(path, UUID=UUID) if UUID == "": #Find it in the database sql = """SELECT sipUUID FROM SIPs WHERE currentPath = '""" + MySQLdb.escape_string( path) + "';" #if waitSleep != 0: #time.sleep(waitSleep) #let db be updated by the microservice that moved it. c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if not row: print "Not opening existing SIP:", UUID, "-", path while row != None: UUID = row[0] print "Opening existing SIP:", UUID, "-", path row = c.fetchone() sqlLock.release() #Create it if UUID == "": UUID = databaseFunctions.createSIP(path) print "DEBUG creating sip", path, UUID return UUID
def getTrimFileDmdSec(baseDirectoryPath, fileGroupIdentifier, fileUUID): ret = etree.Element("dmdSec") mdWrap = etree.SubElement(ret, "mdWrap") mdWrap.set("MDTYPE", "DC") xmlData = etree.SubElement(mdWrap, "xmlData") sql = "SELECT currentLocation FROM Files WHERE removedTime = 0 AND %s = '%s' AND fileGrpUse='TRIM file metadata' AND fileGrpUUID = '%s';" % ('sipUUID', fileGroupIdentifier, fileUUID) rows = databaseInterface.queryAllSQL(sql) if (len(rows) != 1): print >>sys.stderr, "no metadata for original file: ", fileUUID return None for row in rows: xmlFilePath = row[0].replace('%SIPDirectory%', baseDirectoryPath, 1) dublincore = etree.SubElement(xmlData, "dublincore", attrib=None, nsmap={None:dctermsNS}) tree = etree.parse(os.path.join(baseDirectoryPath, xmlFilePath)) root = tree.getroot() etree.SubElement(dublincore, dctermsBNS + "title").text = root.find("Document/TitleFreeTextPart").text etree.SubElement(dublincore, dctermsBNS + "date").text = root.find("Document/DateModified").text etree.SubElement(dublincore, dctermsBNS + "identifier").text = root.find("Document/RecordNumber").text return ret
def storeReport(content, type, name, UUID): sql = """INSERT INTO Reports (content, unitType, unitName, unitIdentifier) VALUES (%s, %s, %s, %s)""" databaseInterface.queryAllSQL(sql, (content, type, name, UUID))
def createFileSec(directoryPath, structMapDiv): global fileNameToFileID global trimStructMap global trimStructMapObjects global globalDmdSecCounter global globalAmdSecCounter global globalDigiprovMDCounter global dmdSecs global amdSecs delayed = [] filesInThisDirectory = [] dspaceMetsDMDID = None directoryContents = os.listdir(directoryPath) directoryContentsTuples = [] for item in directoryContents: itemdirectoryPath = os.path.join(directoryPath, item) if os.path.isdir(itemdirectoryPath): delayed.append(item) elif os.path.isfile(itemdirectoryPath): #find original file name directoryPathSTR = itemdirectoryPath.replace( baseDirectoryPath, baseDirectoryPathString, 1) sql = """SELECT Related.originalLocation AS 'derivedFromOriginalLocation', Current.originalLocation FROM Files AS Current LEFT OUTER JOIN Derivations ON Current.fileUUID = Derivations.derivedFileUUID LEFT OUTER JOIN Files AS Related ON Derivations.sourceFileUUID = Related.fileUUID WHERE Current.removedTime = 0 AND Current.%s = '%s' AND Current.currentLocation = '%s';""" % ( fileGroupType, fileGroupIdentifier, MySQLdb.escape_string(directoryPathSTR)) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if row == None: print >> sys.stderr, "No uuid for file: \"", directoryPathSTR, "\"" sharedVariablesAcrossModules.globalErrorCount += 1 sqlLock.release() continue while row != None: #add to files in this directory tuple list derivedFromOriginalName = row[0] originalLocation = row[1] if derivedFromOriginalName != None: originalLocation = derivedFromOriginalName originalName = os.path.basename( originalLocation ) + u"/" #+ u"/" keeps normalized after original / is very uncommon in a file name directoryContentsTuples.append(( originalName, item, )) row = c.fetchone() sqlLock.release() #order files by their original name for originalName, item in sorted( directoryContentsTuples, key=lambda listItems: listItems[0], cmp=sharedVariablesAcrossModules.collator.compare): #item = unicode(item) itemdirectoryPath = os.path.join(directoryPath, item) #myuuid = uuid.uuid4() myuuid = "" DMDIDS = "" #directoryPathSTR = itemdirectoryPath.replace(baseDirectoryPath + "objects", "objects", 1) directoryPathSTR = itemdirectoryPath.replace(baseDirectoryPath, baseDirectoryPathString, 1) sql = """SELECT fileUUID, fileGrpUse, fileGrpUUID, Files.transferUUID, label, originalLocation, Transfers.type FROM Files LEFT OUTER JOIN Transfers ON Files.transferUUID = Transfers.transferUUID WHERE removedTime = 0 AND %s = '%s' AND Files.currentLocation = '%s';""" % ( fileGroupType, fileGroupIdentifier, MySQLdb.escape_string(directoryPathSTR)) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if row == None: print >> sys.stderr, "No uuid for file: \"", directoryPathSTR, "\"" sharedVariablesAcrossModules.globalErrorCount += 1 sqlLock.release() continue while row != None: myuuid = row[0] use = row[1] fileGrpUUID = row[2] transferUUID = row[3] label = row[4] originalLocation = row[5] typeOfTransfer = row[6] row = c.fetchone() sqlLock.release() filename = ''.join(quoteattr(item).split("\"")[1:-1]) directoryPathSTR = itemdirectoryPath.replace(baseDirectoryPath, "", 1) #print filename, directoryPathSTR if typeOfTransfer == "TRIM" and trimStructMap == None: trimStructMap = etree.Element("structMap", attrib={ "TYPE": "logical", "ID": "structMap_2", "LABEL": "Hierarchical arrangement" }) trimStructMapObjects = etree.SubElement(trimStructMap, "div", attrib={ "TYPE": "File", "LABEL": "objects" }) trimDmdSec = getTrimDmdSec(baseDirectoryPath, fileGroupIdentifier) globalDmdSecCounter += 1 dmdSecs.append(trimDmdSec) ID = "dmdSec_" + globalDmdSecCounter.__str__() trimDmdSec.set("ID", ID) trimStructMapObjects.set("DMDID", ID) # == trimAmdSec = etree.Element("amdSec") globalAmdSecCounter += 1 amdSecs.append(trimAmdSec) ID = "amdSec_" + globalAmdSecCounter.__str__() trimAmdSec.set("ID", ID) digiprovMD = getTrimAmdSec(baseDirectoryPath, fileGroupIdentifier) globalDigiprovMDCounter += 1 digiprovMD.set("ID", "digiprovMD_" + globalDigiprovMDCounter.__str__()) trimAmdSec.append(digiprovMD) trimStructMapObjects.set("ADMID", ID) FILEID = "%s-%s" % (item, myuuid) if FILEID[0].isdigit(): FILEID = "_" + FILEID #<fptr FILEID="file1-UUID"/> fileDiv = etree.SubElement(structMapDiv, "div") if label != None: fileDiv.set("LABEL", label) fileDiv.set("TYPE", "Item") newChild(fileDiv, "fptr", sets=[("FILEID", FILEID)]) fileNameToFileID[item] = FILEID GROUPID = "" if fileGrpUUID: GROUPID = "Group-%s" % (fileGrpUUID) if use == "TRIM file metadata": use = "metadata" elif use == "original" or use == "submissionDocumentation" or use == "metadata" or use == "maildirFile": GROUPID = "Group-%s" % (myuuid) if use == "maildirFile": use = "original" if use == "original": DMDIDS = createDMDIDSFromCSVParsedMetadataFiles( originalLocation.replace('%transferDirectory%', "", 1)) if DMDIDS: fileDiv.set("DMDID", DMDIDS) if typeOfTransfer == "TRIM": trimFileDiv = etree.SubElement(trimStructMapObjects, "div", attrib={"TYPE": "Item"}) trimFileDmdSec = getTrimFileDmdSec(baseDirectoryPath, fileGroupIdentifier, myuuid) globalDmdSecCounter += 1 dmdSecs.append(trimFileDmdSec) ID = "dmdSec_" + globalDmdSecCounter.__str__() trimFileDmdSec.set("ID", ID) trimFileDiv.set("DMDID", ID) etree.SubElement(trimFileDiv, "fptr", attrib={"FILEID": FILEID}) elif use == "preservation": sql = "SELECT * FROM Derivations WHERE derivedFileUUID = '" + myuuid + "';" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: GROUPID = "Group-%s" % (row[1]) row = c.fetchone() sqlLock.release() elif use == "license" or use == "text/ocr" or use == "DSPACEMETS": sql = """SELECT fileUUID FROM Files WHERE removedTime = 0 AND %s = '%s' AND fileGrpUse = 'original' AND originalLocation LIKE '%s/%%'""" % ( fileGroupType, fileGroupIdentifier, MySQLdb.escape_string( os.path.dirname(originalLocation)).replace("%", "\%")) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: GROUPID = "Group-%s" % (row[0]) row = c.fetchone() sqlLock.release() elif use == "service": fileFileIDPath = itemdirectoryPath.replace( baseDirectoryPath + "objects/service/", baseDirectoryPathString + "objects/") objectNameExtensionIndex = fileFileIDPath.rfind(".") fileFileIDPath = fileFileIDPath[:objectNameExtensionIndex + 1] sql = """SELECT fileUUID FROM Files WHERE removedTime = 0 AND %s = '%s' AND fileGrpUse = 'original' AND currentLocation LIKE '%s%%'""" % ( fileGroupType, fileGroupIdentifier, MySQLdb.escape_string(fileFileIDPath.replace("%", "\%"))) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: GROUPID = "Group-%s" % (row[0]) row = c.fetchone() sqlLock.release() elif use == "TRIM container metadata": GROUPID = "Group-%s" % (myuuid) use = "metadata" if transferUUID: sql = "SELECT type FROM Transfers WHERE transferUUID = '%s';" % ( transferUUID) rows = databaseInterface.queryAllSQL(sql) if rows[0][0] == "Dspace": if use == "DSPACEMETS": use = "submissionDocumentation" admidApplyTo = None if GROUPID == "": #is an AIP identifier GROUPID = myuuid admidApplyTo = structMapDiv.getparent() LABEL = "mets.xml-%s" % (GROUPID) dmdSec, ID = createMDRefDMDSec(LABEL, itemdirectoryPath, directoryPathSTR) dmdSecs.append(dmdSec) if admidApplyTo != None: admidApplyTo.set("DMDID", ID) else: dspaceMetsDMDID = ID if GROUPID == "": sharedVariablesAcrossModules.globalErrorCount += 1 print >> sys.stderr, "No groupID for file: \"", directoryPathSTR, "\"" if use not in globalFileGrps: print >> sys.stderr, "Invalid use: \"%s\"" % (use) sharedVariablesAcrossModules.globalErrorCount += 1 else: file = newChild(globalFileGrps[use], "file", sets=[("ID", FILEID), ("GROUPID", GROUPID)]) if use == "original": filesInThisDirectory.append(file) #<Flocat xlink:href="objects/file1-UUID" locType="other" otherLocType="system"/> Flocat = newChild(file, "FLocat", sets=[(xlinkBNS + "href", directoryPathSTR), ("LOCTYPE", "OTHER"), ("OTHERLOCTYPE", "SYSTEM")]) if includeAmdSec: AMD, ADMID = getAMDSec(myuuid, directoryPathSTR, use, fileGroupType, fileGroupIdentifier, transferUUID, itemdirectoryPath, typeOfTransfer) amdSecs.append(AMD) file.set("ADMID", ADMID) if dspaceMetsDMDID != None: for file in filesInThisDirectory: file.set("DMDID", dspaceMetsDMDID) for item in sorted(delayed, cmp=sharedVariablesAcrossModules.collator.compare): itemdirectoryPath = os.path.join(directoryPath, item) directoryDiv = newChild(structMapDiv, "div", sets=[("TYPE", "Directory"), ("LABEL", item)]) DMDIDS = createDMDIDSFromCSVParsedMetadataDirectories( itemdirectoryPath.replace(baseDirectoryPath, "", 1)) if DMDIDS: directoryDiv.set("DMDID", DMDIDS) createFileSec(itemdirectoryPath, directoryDiv)
# @package Archivematica # @subpackage archivematicaClientScript # @author Joseph Perry <*****@*****.**> import os import sys import uuid import shutil sys.path.append("/usr/lib/archivematica/archivematicaCommon") import databaseInterface if __name__ == '__main__': unitUUID = sys.argv[1] filePath = sys.argv[2] uuidLen = 36 basename = os.path.basename(filePath) fileFauxUUID = basename[:uuidLen] fileName = basename[uuidLen:] dirname = os.path.dirname(filePath) sql = """SELECT fileUUID FROM FauxFileIDsMap WHERE fauxSIPUUID='%s' AND fauxFileUUID='%s';""" % (unitUUID, fileFauxUUID) rows = databaseInterface.queryAllSQL(sql) if len(rows) != 1: print >>sys.stderr, "Wrong rows returned", sql, rows exit(-1) originalFileUUID = rows[0][0] dst = os.path.join(dirname, originalFileUUID + fileName) print basename, " -> ", originalFileUUID + fileName shutil.move(filePath, dst)
def index_transfer_files(conn, uuid, pathToTransfer, index, type): filesIndexed = 0 ingest_date = str(datetime.datetime.today())[0:10] create_time = time.time() # extract transfer name from path path_without_uuid = pathToTransfer[:-45] last_slash_position = path_without_uuid.rfind('/') transfer_name = path_without_uuid[last_slash_position + 1:] # get accessionId from transfers table using UUID accession_id = '' sql = "SELECT accessionID from Transfers WHERE transferUUID='" + MySQLdb.escape_string(uuid) + "'" rows = databaseInterface.queryAllSQL(sql) if len(rows) > 0: accession_id = rows[0][0] # get file UUID information fileUUIDs = {} sql = "SELECT currentLocation, fileUUID FROM Files WHERE transferUUID='" + MySQLdb.escape_string(uuid) + "'" rows = databaseInterface.queryAllSQL(sql) for row in rows: file_path = row[0] fileUUIDs[file_path] = row[1] for filepath in list_files_in_dir(pathToTransfer): if os.path.isfile(filepath): relative_path = '%transferDirectory%objects' + filepath.replace(pathToTransfer, '') sql = "SELECT fileUUID FROM Files WHERE currentLocation='" + MySQLdb.escape_string(relative_path) + "' AND transferUUID='" + MySQLdb.escape_string(uuid) + "'" rows = databaseInterface.queryAllSQL(sql) if len(rows) > 0: file_uuid = rows[0][0] else: file_uuid = '' indexData = { 'filepath' : filepath, 'filename' : os.path.basename(filepath), 'fileuuid' : file_uuid, 'sipuuid' : uuid, 'accessionid' : accession_id, 'status' : '', 'origin' : getDashboardUUID(), 'ingestdate' : ingest_date, 'created' : create_time } fileName, fileExtension = os.path.splitext(filepath) if fileExtension != '': indexData['fileExtension'] = fileExtension[1:].lower() conn.index(indexData, index, type, bulk=True) filesIndexed = filesIndexed + 1 if filesIndexed > 0: conn.refresh() return filesIndexed
def createDigiprovMD(fileUUID): ret = [] #EVENTS sql = "SELECT pk, fileUUID, eventIdentifierUUID, eventType, eventDateTime, eventDetail, eventOutcome, eventOutcomeDetailNote, linkingAgentIdentifier FROM Events WHERE fileUUID = '" + fileUUID + "';" rows = databaseInterface.queryAllSQL(sql) for row in rows: digiprovMD = etree.Element("digiprovMD") ret.append(digiprovMD) #newChild(amdSec, "digiprovMD") #digiprovMD.set("ID", "digiprov-"+ os.path.basename(filename) + "-" + fileUUID) global globalDigiprovMDCounter globalDigiprovMDCounter += 1 digiprovMD.set("ID", "digiprovMD_" + globalDigiprovMDCounter.__str__()) mdWrap = newChild(digiprovMD, "mdWrap") mdWrap.set("MDTYPE", "PREMIS:EVENT") xmlData = newChild(mdWrap, "xmlData") event = etree.SubElement(xmlData, "event", nsmap={None: premisNS}) event.set( xsiBNS + "schemaLocation", premisNS + " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd") event.set("version", "2.2") eventIdentifier = etree.SubElement(event, "eventIdentifier") etree.SubElement(eventIdentifier, "eventIdentifierType").text = "UUID" etree.SubElement(eventIdentifier, "eventIdentifierValue").text = row[2] etree.SubElement(event, "eventType").text = row[3] etree.SubElement(event, "eventDateTime").text = row[4].__str__().replace( " ", "T") etree.SubElement(event, "eventDetail").text = escape(row[5]) eventOutcomeInformation = etree.SubElement(event, "eventOutcomeInformation") etree.SubElement(eventOutcomeInformation, "eventOutcome").text = row[6] eventOutcomeDetail = etree.SubElement(eventOutcomeInformation, "eventOutcomeDetail") etree.SubElement(eventOutcomeDetail, "eventOutcomeDetailNote").text = escape(row[7]) if row[8]: linkingAgentIdentifier = etree.SubElement( event, "linkingAgentIdentifier") etree.SubElement( linkingAgentIdentifier, "linkingAgentIdentifierType").text = "Archivematica user pk" etree.SubElement( linkingAgentIdentifier, "linkingAgentIdentifierValue").text = row[8].__str__() #linkingAgentIdentifier sql = """SELECT agentIdentifierType, agentIdentifierValue, agentName, agentType FROM Agents;""" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: linkingAgentIdentifier = etree.SubElement( event, "linkingAgentIdentifier") etree.SubElement(linkingAgentIdentifier, "linkingAgentIdentifierType").text = row[0] etree.SubElement(linkingAgentIdentifier, "linkingAgentIdentifierValue").text = row[1] row = c.fetchone() sqlLock.release() return ret
def identifyCommands(fileName): """Identify file type(s)""" ret = [] premisFile = opts.logsDirectory + "fileMeta/" + opts.fileUUID + ".xml" try: for pronomID in getPronomsFromPremis(premisFile): sql = """SELECT CR.pk, CR.command, CR.GroupMember FROM CommandRelationships AS CR JOIN FileIDs ON CR.fileID=FileIDs.pk JOIN CommandClassifications ON CR.commandClassification = CommandClassifications.pk JOIN FileIDsByPronom AS FIBP ON FileIDs.pk = FIBP.FileIDs WHERE FIBP.FileID = '""" + pronomID.__str__() + """' AND CommandClassifications.classification = '""" + opts.commandClassifications + """';""" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: ret.append(row) row = c.fetchone() sqlLock.release() except: print >> sys.stderr, "Failed to retrieve pronomIDs." ret = [] if transcoder.fileExtension: sql = """SELECT CR.pk, CR.command, CR.GroupMember FROM CommandRelationships AS CR JOIN FileIDs ON CR.fileID=FileIDs.pk JOIN CommandClassifications ON CR.commandClassification = CommandClassifications.pk JOIN FileIDsByExtension AS FIBE ON FileIDs.pk = FIBE.FileIDs WHERE FIBE.Extension = '""" + transcoder.fileExtension.__str__() + """' AND CommandClassifications.classification = '""" + opts.commandClassifications + """';""" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: ret.append(row) row = c.fetchone() sqlLock.release() if not len(ret): if opts.commandClassifications == "preservation": if inPreservationFormat(): print "Already in preservation format." else: print >> sys.stderr, "Unable to verify archival readiness." #Issue 528: related to exit code exit(0) elif opts.commandClassifications == "access": sql = """SELECT CR.pk, CR.command, CR.GroupMember FROM CommandRelationships AS CR JOIN Commands AS C ON CR.command = C.pk WHERE C.description = 'Copying file to access directory.';""" rows = databaseInterface.queryAllSQL(sql) for row in rows: cl = transcoder.CommandLinker(row) copyExitCode = cl.execute() if copyExitCode: exit(copyExitCode) if inAccessFormat(): print "Already in access format." exit(0) else: print >> sys.stderr, "Unable to verify access readiness." #Issue 528: related to exit code exit(0) elif opts.commandClassifications == "thumbnail": #use default thumbnail print "Using default thumbnail" sql = """SELECT CR.pk, CR.command, CR.GroupMember FROM CommandRelationships AS CR JOIN Commands AS C ON CR.command = C.pk WHERE C.description = 'Using default thumbnail.';""" rows = databaseInterface.queryAllSQL(sql) for row in rows: cl = transcoder.CommandLinker(row) copyExitCode = cl.execute() exit(copyExitCode) return ret
def process_transfer(request, transfer_uuid): response = {} if request.user.id: # get transfer info transfer = models.Transfer.objects.get(uuid=transfer_uuid) transfer_path = transfer.currentlocation.replace( '%sharedPath%', helpers.get_server_config_value('sharedDirectory')) import MySQLdb import databaseInterface import databaseFunctions import shutil from archivematicaCreateStructuredDirectory import createStructuredDirectory from archivematicaCreateStructuredDirectory import createManualNormalizedDirectoriesList createStructuredDirectory(transfer_path, createManualNormalizedDirectories=False) processingDirectory = helpers.get_server_config_value( 'processingDirectory') transfer_directory_name = os.path.basename(transfer_path[:-1]) transfer_name = transfer_directory_name[:-37] sharedPath = helpers.get_server_config_value('sharedDirectory') tmpSIPDir = os.path.join(processingDirectory, transfer_name) + "/" #processSIPDirectory = os.path.join(sharedPath, 'watchedDirectories/system/autoProcessSIP') + '/' processSIPDirectory = os.path.join( sharedPath, 'watchedDirectories/SIPCreation/SIPsUnderConstruction') + '/' #destSIPDir = os.path.join(processSIPDirectory, transfer_name) + "/" #destSIPDir = os.path.join(processSIPDirectory, transfer_name + '-' + ) + "/" createStructuredDirectory(tmpSIPDir, createManualNormalizedDirectories=False) objectsDirectory = os.path.join(transfer_path, 'objects') + '/' """ #create row in SIPs table if one doesn't already exist lookup_path = destSIPDir.replace(sharedPath, '%sharedPath%') #lookup_path = '%sharedPath%watchedDirectories/workFlowDecisions/createDip/' + transfer_name + '/' sql = " " "SELECT sipUUID FROM SIPs WHERE currentPath = '" " " + MySQLdb.escape_string(lookup_path) + "';" rows = databaseInterface.queryAllSQL(sql) if len(rows) > 0: row = rows[0] sipUUID = row[0] else: sipUUID = uuid.uuid4().__str__() databaseFunctions.createSIP(lookup_path, sipUUID) """ sipUUID = uuid.uuid4().__str__() destSIPDir = os.path.join(processSIPDirectory, transfer_name) + "/" lookup_path = destSIPDir.replace(sharedPath, '%sharedPath%') databaseFunctions.createSIP(lookup_path, sipUUID) #move the objects to the SIPDir for item in os.listdir(objectsDirectory): shutil.move(os.path.join(objectsDirectory, item), os.path.join(tmpSIPDir, "objects", item)) #get the database list of files in the objects directory #for each file, confirm it's in the SIP objects directory, and update the current location/ owning SIP' sql = """SELECT fileUUID, currentLocation FROM Files WHERE removedTime = 0 AND currentLocation LIKE '\%transferDirectory\%objects%' AND transferUUID = '""" + transfer_uuid + "'" for row in databaseInterface.queryAllSQL(sql): fileUUID = row[0] currentPath = databaseFunctions.deUnicode(row[1]) currentSIPFilePath = currentPath.replace("%transferDirectory%", tmpSIPDir) if os.path.isfile(currentSIPFilePath): sql = """UPDATE Files SET currentLocation='%s', sipUUID='%s' WHERE fileUUID='%s'""" % ( MySQLdb.escape_string( currentPath.replace( "%transferDirectory%", "%SIPDirectory%")), sipUUID, fileUUID) databaseInterface.runSQL(sql) else: print >> sys.stderr, "file not found: ", currentSIPFilePath #copy processingMCP.xml file src = os.path.join(os.path.dirname(objectsDirectory[:-1]), "processingMCP.xml") dst = os.path.join(tmpSIPDir, "processingMCP.xml") shutil.copy(src, dst) #moveSIPTo processSIPDirectory shutil.move(tmpSIPDir, destSIPDir) elasticSearchFunctions.connect_and_change_transfer_file_status( transfer_uuid, '') response['message'] = 'SIP ' + sipUUID + ' created.' else: response['error'] = True response['message'] = 'Must be logged in.' return HttpResponse( simplejson.JSONEncoder(encoding='utf-8').encode(response), mimetype='application/json')
objectsDirectory = sys.argv[1] transferName = sys.argv[2] transferUUID = sys.argv[3] processingDirectory = sys.argv[4] autoProcessSIPDirectory = sys.argv[5] sharedPath = sys.argv[6] sipName = transferName tmpSIPDir = os.path.join(processingDirectory, sipName) + "/" destSIPDir = os.path.join(autoProcessSIPDirectory, sipName) + "/" createStructuredDirectory(tmpSIPDir, createManualNormalizedDirectories=False) #create row in SIPs table if one doesn't already exist lookup_path = destSIPDir.replace(sharedPath, '%sharedPath%') sql = """SELECT sipUUID FROM SIPs WHERE currentPath = '""" + MySQLdb.escape_string(lookup_path) + "';" rows = databaseInterface.queryAllSQL(sql) if len(rows) > 0: row = rows[0] sipUUID = row[0] else: sipUUID = uuid.uuid4().__str__() databaseFunctions.createSIP(lookup_path, sipUUID) #move the objects to the SIPDir for item in os.listdir(objectsDirectory): shutil.move(os.path.join(objectsDirectory, item), os.path.join(tmpSIPDir, "objects", item)) #get the database list of files in the objects directory #for each file, confirm it's in the SIP objects directory, and update the current location/ owning SIP' sql = """SELECT fileUUID, currentLocation FROM Files WHERE removedTime = 0 AND currentLocation LIKE '\%transferDirectory\%objects%' AND transferUUID = '""" + transferUUID + "'" for row in databaseInterface.queryAllSQL(sql):
def archivematicaGetRights(metadataAppliesToList, fileUUID): """[(fileUUID, fileUUIDTYPE), (sipUUID, sipUUIDTYPE), (transferUUID, transferUUIDType)]""" ret = [] rightsBasisActuallyOther = ["Policy", "Donor"] for metadataAppliesToidentifier, metadataAppliesToType in metadataAppliesToList: list = "RightsStatement.pk, rightsStatementIdentifierType, rightsStatementIdentifierType, rightsStatementIdentifierValue, rightsBasis, copyrightStatus, copyrightJurisdiction, copyrightStatusDeterminationDate, licenseTerms, copyrightApplicableStartDate, copyrightApplicableEndDate, licenseApplicableStartDate, licenseApplicableEndDate" key = list.split(", ") sql = """SELECT %s FROM RightsStatement LEFT JOIN RightsStatementCopyright ON RightsStatementCopyright.fkRightsStatement = RightsStatement.pk LEFT JOIN RightsStatementLicense ON RightsStatementLicense.fkRightsStatement = RightsStatement.pk WHERE metadataAppliesToidentifier = '%s' AND metadataAppliesToType = '%s';""" % ( list, metadataAppliesToidentifier, metadataAppliesToType) rows = databaseInterface.queryAllSQL(sql) if not rows: continue else: for row in rows: valueDic = {} rightsStatement = etree.Element("rightsStatement", nsmap={None: premisNS}) rightsStatement.set( xsiBNS + "schemaLocation", premisNS + " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd") #rightsStatement.set("version", "2.1") #cvc-complex-type.3.2.2: Attribute 'version' is not allowed to appear in element 'rightsStatement'. ret.append(rightsStatement) for i in range(len(key)): valueDic[key[i]] = row[i] rightsStatementIdentifier = etree.SubElement( rightsStatement, "rightsStatementIdentifier") if valueDic["rightsStatementIdentifierValue"]: etree.SubElement( rightsStatementIdentifier, "rightsStatementIdentifierType" ).text = valueDic["rightsStatementIdentifierType"] etree.SubElement( rightsStatementIdentifier, "rightsStatementIdentifierValue" ).text = valueDic["rightsStatementIdentifierValue"] else: etree.SubElement( rightsStatementIdentifier, "rightsStatementIdentifierType").text = "UUID" etree.SubElement(rightsStatementIdentifier, "rightsStatementIdentifierValue" ).text = uuid.uuid4().__str__() if valueDic["rightsBasis"] in rightsBasisActuallyOther: etree.SubElement(rightsStatement, "rightsBasis").text = "Other" else: etree.SubElement( rightsStatement, "rightsBasis").text = valueDic["rightsBasis"] #copright information if valueDic["rightsBasis"].lower() in ["copyright"]: sql = """SELECT pk, copyrightStatus, copyrightJurisdiction, copyrightStatusDeterminationDate, copyrightApplicableStartDate, copyrightApplicableEndDate, copyrightApplicableEndDateOpen FROM RightsStatementCopyright WHERE fkRightsStatement = %d""" % ( valueDic["RightsStatement.pk"]) rows2 = databaseInterface.queryAllSQL(sql) for row2 in rows2: copyrightInformation = etree.SubElement( rightsStatement, "copyrightInformation") etree.SubElement(copyrightInformation, "copyrightStatus" ).text = valueDic["copyrightStatus"] copyrightJurisdiction = valueDic[ "copyrightJurisdiction"] copyrightJurisdictionCode = getCodeForCountry( copyrightJurisdiction.__str__().upper()) if copyrightJurisdictionCode != None: copyrightJurisdiction = copyrightJurisdictionCode etree.SubElement(copyrightInformation, "copyrightJurisdiction" ).text = copyrightJurisdiction etree.SubElement( copyrightInformation, "copyrightStatusDeterminationDate" ).text = formatDate( valueDic["copyrightStatusDeterminationDate"]) #copyrightNote Repeatable sql = "SELECT copyrightNote FROM RightsStatementCopyrightNote WHERE fkRightsStatementCopyrightInformation = %d;" % ( row2[0]) rows3 = databaseInterface.queryAllSQL(sql) for row3 in rows3: etree.SubElement(copyrightInformation, "copyrightNote").text = row3[0] #RightsStatementCopyrightDocumentationIdentifier getDocumentationIdentifier( valueDic["RightsStatement.pk"], copyrightInformation) copyrightApplicableDates = etree.SubElement( copyrightInformation, "copyrightApplicableDates") if valueDic["copyrightApplicableStartDate"]: etree.SubElement( copyrightApplicableDates, "startDate").text = formatDate( valueDic["copyrightApplicableStartDate"]) if row2[6]: #, copyrightApplicableEndDateOpen etree.SubElement(copyrightApplicableDates, "endDate").text = "OPEN" elif valueDic["copyrightApplicableEndDate"]: etree.SubElement( copyrightApplicableDates, "endDate").text = formatDate( valueDic["copyrightApplicableEndDate"]) elif valueDic["rightsBasis"].lower() in ["license"]: sql = """SELECT licenseTerms, licenseApplicableStartDate, licenseApplicableEndDate, licenseDocumentationIdentifierType, licenseDocumentationIdentifierValue, RightsStatementLicense.pk, licenseDocumentationIdentifierRole, licenseApplicableEndDateOpen FROM RightsStatementLicense JOIN RightsStatementLicenseDocumentationIdentifier ON RightsStatementLicenseDocumentationIdentifier.fkRightsStatementLicense = RightsStatementLicense.pk WHERE RightsStatementLicense.fkRightsStatement = %d;""" % ( valueDic["RightsStatement.pk"]) rows2 = databaseInterface.queryAllSQL(sql) for row2 in rows2: licenseInformation = etree.SubElement( rightsStatement, "licenseInformation") licenseDocumentIdentifier = etree.SubElement( licenseInformation, "licenseDocumentationIdentifier") etree.SubElement(licenseDocumentIdentifier, "licenseDocumentationIdentifierType" ).text = row2[3] etree.SubElement( licenseDocumentIdentifier, "licenseDocumentationIdentifierValue" ).text = row2[4] etree.SubElement( licenseDocumentIdentifier, "licenseDocumentationRole").text = row2[6] etree.SubElement( licenseInformation, "licenseTerms").text = valueDic["licenseTerms"] sql = "SELECT licenseNote FROM RightsStatementLicenseNote WHERE fkRightsStatementLicense = %d;" % ( row2[5]) rows3 = databaseInterface.queryAllSQL(sql) for row3 in rows3: etree.SubElement(licenseInformation, "licenseNote").text = row3[0] licenseApplicableDates = etree.SubElement( licenseInformation, "licenseApplicableDates") if valueDic["licenseApplicableStartDate"]: etree.SubElement( licenseApplicableDates, "startDate").text = formatDate( valueDic["licenseApplicableStartDate"]) if row2[7]: #licenseApplicableEndDateOpen etree.SubElement(licenseApplicableDates, "endDate").text = "OPEN" elif valueDic["licenseApplicableEndDate"]: etree.SubElement( licenseApplicableDates, "endDate").text = formatDate( valueDic["licenseApplicableEndDate"]) elif valueDic["rightsBasis"].lower() in ["statute"]: #4.1.5 statuteInformation (O, R) getstatuteInformation(valueDic["RightsStatement.pk"], rightsStatement) elif valueDic["rightsBasis"].lower() in [ "donor", "policy", "other" ]: otherRightsInformation = etree.SubElement( rightsStatement, "otherRightsInformation") sql = """SELECT pk, otherRightsBasis, otherRightsApplicableStartDate, otherRightsApplicableEndDate, otherRightsApplicableEndDateOpen FROM RightsStatementOtherRightsInformation WHERE RightsStatementOtherRightsInformation.fkRightsStatement = %d;""" % ( valueDic["RightsStatement.pk"]) rows2 = databaseInterface.queryAllSQL(sql) for row2 in rows2: #otherRightsDocumentationIdentifier sql = """SELECT otherRightsDocumentationIdentifierType, otherRightsDocumentationIdentifierValue, otherRightsDocumentationIdentifierRole FROM RightsStatementOtherRightsDocumentationIdentifier WHERE fkRightsStatementotherRightsInformation = %s """ % ( row2[0]) rows3 = databaseInterface.queryAllSQL(sql) for row3 in rows3: otherRightsDocumentationIdentifier = etree.SubElement( otherRightsInformation, "otherRightsDocumentationIdentifier") etree.SubElement( otherRightsDocumentationIdentifier, "otherRightsDocumentationIdentifierType" ).text = row3[0] etree.SubElement( otherRightsDocumentationIdentifier, "otherRightsDocumentationIdentifierValue" ).text = row3[1] etree.SubElement( otherRightsDocumentationIdentifier, "otherRightsDocumentationRole").text = row3[2] otherRightsBasis = row2[1] if not otherRightsBasis or valueDic[ "rightsBasis"] in rightsBasisActuallyOther: #not 100% otherRightsBasis = valueDic["rightsBasis"] etree.SubElement( otherRightsInformation, "otherRightsBasis").text = otherRightsBasis otherRightsApplicableStartDate = row2[2] otherRightsApplicableEndDate = row2[3] otherRightsApplicableEndDateOpen = row2[4] if otherRightsApplicableStartDate or otherRightsApplicableEndDate: otherRightsApplicableDates = etree.SubElement( otherRightsInformation, "otherRightsApplicableDates") if otherRightsApplicableStartDate: etree.SubElement( otherRightsApplicableDates, "startDate").text = formatDate( otherRightsApplicableStartDate) if otherRightsApplicableEndDateOpen: etree.SubElement(otherRightsApplicableDates, "endDate").text = "OPEN" elif otherRightsApplicableEndDate: etree.SubElement( otherRightsApplicableDates, "endDate").text = formatDate( otherRightsApplicableEndDate) #otherRightsNote Repeatable sql = "SELECT otherRightsNote FROM RightsStatementOtherRightsNote WHERE fkRightsStatementOtherRightsInformation = %d;" % ( row2[0]) rows3 = databaseInterface.queryAllSQL(sql) for row3 in rows3: etree.SubElement(otherRightsInformation, "otherRightsNote").text = row3[0] #4.1.6 rightsGranted (O, R) getrightsGranted(valueDic["RightsStatement.pk"], rightsStatement) #4.1.7 linkingObjectIdentifier (O, R) linkingObjectIdentifier = etree.SubElement( rightsStatement, "linkingObjectIdentifier") etree.SubElement(linkingObjectIdentifier, "linkingObjectIdentifierType").text = "UUID" etree.SubElement( linkingObjectIdentifier, "linkingObjectIdentifierValue").text = fileUUID return ret
def alreadyNormalizedManually(self, unit, CommandClassification): """ Return True if file was normalized manually, False if not. Checks by looking for access/preservation files for a give original file. Check the manualNormalization/access and manualNormalization/preservation directories for access and preservation files. If a nomalization.csv file is specified, check there first for the mapping between original file and access/preservation file. """ # Setup SIPUUID = unit.owningUnit.UUID fileUUID = unit.UUID SIPPath = unit.owningUnit.currentPath filePath = unit.currentPath bname = os.path.basename(filePath) dirName = os.path.dirname(filePath) # If normalization.csv provided, check there for mapping from original # to access/preservation file SIPPath = SIPPath.replace( "%sharedPath%", archivematicaMCP.config.get('MCPServer', "sharedDirectory", 1)) normalization_csv = os.path.join(SIPPath, "objects", "manualNormalization", "normalization.csv") if os.path.isfile(normalization_csv): found = False with open(normalization_csv, 'rb') as csv_file: reader = csv.reader(csv_file) # Search the file for an original filename that matches the one provided try: for row in reader: if "#" in row[0]: # ignore comments continue original, access, preservation = row if original.lower() == bname.lower(): found = True break except csv.Error as e: print >> sys.stderr, "Error reading {filename} on line {linenum}".format( filename=normalization_csv, linenum=reader.line_num) return False # how indicate error? # If we didn't find a match, let it fall through to the usual method if found: # No manually normalized file for command classification if CommandClassification == "preservation" and not preservation: return False if CommandClassification == "access" and not access: return False # If we found a match, verify access/preservation exists in DB # match and pull original location b/c sanitization if CommandClassification == "preservation": filename = preservation elif CommandClassification == "access": filename = access else: return False sql = """SELECT Files.fileUUID, Files.currentLocation FROM Files WHERE sipUUID = '{SIPUUID}' AND originalLocation LIKE '%{filename}' AND removedTime = 0;""".format(SIPUUID=SIPUUID, filename=filename) rows = databaseInterface.queryAllSQL(sql) return bool(rows) # Assume that any access/preservation file found with the right # name is the correct one bname = os.path.splitext(bname)[0] path = os.path.join(dirName, bname) if CommandClassification == "preservation": path = path.replace( "%SIPDirectory%objects/", "%SIPDirectory%objects/manualNormalization/preservation/") elif CommandClassification == "access": path = path.replace( "%SIPDirectory%objects/", "%SIPDirectory%objects/manualNormalization/access/") else: return False try: sql = """SELECT fileUUID FROM Files WHERE sipUUID = '%s' AND currentLocation LIKE '%s%%' AND removedTime = 0;""" % ( SIPUUID, path.replace("%", "\%")) ret = bool(databaseInterface.queryAllSQL(sql)) return ret except Exception as inst: print "DEBUG EXCEPTION!" traceback.print_exc(file=sys.stdout) print >> sys.stderr, type(inst), inst.args
def __init__(self, jobChainLink, pk, unit): global outputLock self.tasks = {} self.tasksLock = threading.Lock() self.pk = pk self.jobChainLink = jobChainLink self.exitCode = 0 self.clearToNextLink = False opts = { "inputFile": "%relativeLocation%", "fileUUID": "%fileUUID%", 'commandClassifications': '%commandClassifications%', "taskUUID": "%taskUUID%", "objectsDirectory": "%SIPObjectsDirectory%", "logsDirectory": "%SIPLogsDirectory%", "sipUUID": "%SIPUUID%", "sipPath": "%SIPDirectory%", "fileGrpUse": "%fileGrpUse%", "normalizeFileGrpUse": "%normalizeFileGrpUse%", "excludeDirectory": "%excludeDirectory%", "standardErrorFile": "%standardErrorFile%", "standardOutputFile": "%standardOutputFile%" } SIPReplacementDic = unit.getReplacementDic(unit.currentPath) for optsKey, optsValue in opts.iteritems(): if self.jobChainLink.passVar != None: if isinstance(self.jobChainLink.passVar, replacementDic): opts[optsKey] = self.jobChainLink.passVar.replace( opts[optsKey])[0] commandReplacementDic = unit.getReplacementDic() for key, value in commandReplacementDic.iteritems(): opts[optsKey] = opts[optsKey].replace(key, value) for key, value in SIPReplacementDic.iteritems(): opts[optsKey] = opts[optsKey].replace(key, value) commandReplacementDic = unit.getReplacementDic() sql = """SELECT CommandRelationships.pk FROM CommandRelationships JOIN Commands ON CommandRelationships.command = Commands.pk WHERE CommandRelationships.pk = '%s';""" % ( pk.__str__()) rows = databaseInterface.queryAllSQL(sql) taskCount = 0 tasksList = [] if rows: self.tasksLock.acquire() for row in rows: UUID = uuid.uuid4().__str__() opts["taskUUID"] = UUID opts["CommandRelationship"] = pk.__str__() execute = "transcoder_cr%s" % (pk) deUnicode(execute) arguments = row.__str__() standardOutputFile = opts["standardOutputFile"] standardErrorFile = opts["standardErrorFile"] self.standardOutputFile = standardOutputFile self.standardErrorFile = standardErrorFile self.execute = execute self.arguments = arguments task = taskStandard(self, execute, opts, standardOutputFile, standardErrorFile, outputLock=outputLock, UUID=UUID) self.tasks[UUID] = task databaseFunctions.logTaskCreatedSQL(self, commandReplacementDic, UUID, arguments) taskCount += 1 tasksList.append(task) self.tasksLock.release() for task in tasksList: task.performTask() else: self.jobChainLink.linkProcessingComplete(self.exitCode)
def __init__(self, jobChainLink, pk, unit): self.tasks = {} self.tasksLock = threading.Lock() self.pk = pk self.jobChainLink = jobChainLink self.exitCode = 0 self.clearToNextLink = False sql = """SELECT * FROM StandardTasksConfigs where pk = '%s'""" % ( pk.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: filterFileEnd = deUnicode(row[1]) filterFileStart = deUnicode(row[2]) filterSubDir = deUnicode(row[3]) requiresOutputLock = row[4] self.standardOutputFile = deUnicode(row[5]) self.standardErrorFile = deUnicode(row[6]) self.execute = deUnicode(row[7]) self.arguments = deUnicode(row[8]) row = c.fetchone() sqlLock.release() if requiresOutputLock: outputLock = threading.Lock() else: outputLock = None SIPReplacementDic = unit.getReplacementDic(unit.currentPath) SIPUUID = unit.owningUnit.UUID sql = """SELECT variableValue FROM UnitVariables WHERE unitType = 'SIP' AND variable = 'normalizationFileIdentificationToolIdentifierTypes' AND unitUUID = '%s';""" % ( SIPUUID) rows = databaseInterface.queryAllSQL(sql) if len(rows): fileIdentificationRestriction = rows[0][0] else: fileIdentificationRestriction = None self.tasksLock.acquire() for file, fileUnit in unit.fileList.items(): #print "file:", file, fileUnit if filterFileEnd: if not file.endswith(filterFileEnd): continue if filterFileStart: if not os.path.basename(file).startswith(filterFileStart): continue if filterSubDir: #print "file", file, type(file) #print unit.pathString, type(unit.pathString) #filterSubDir = filterSubDir.encode('utf-8') #print filterSubDir, type(filterSubDir) if not file.startswith(unit.pathString + filterSubDir): print "skipping file", file, filterSubDir, " : \t Doesn't start with: ", unit.pathString + filterSubDir continue standardOutputFile = self.standardOutputFile standardErrorFile = self.standardErrorFile execute = self.execute arguments = self.arguments if self.jobChainLink.passVar != None: if isinstance(self.jobChainLink.passVar, replacementDic): execute, arguments, standardOutputFile, standardErrorFile = self.jobChainLink.passVar.replace( execute, arguments, standardOutputFile, standardErrorFile) fileUUID = unit.UUID ComandClassification = self.execute #passVar=self.jobChainLink.passVar toPassVar = eval(arguments) toPassVar.update({ "%standardErrorFile%": standardErrorFile, "%standardOutputFile%": standardOutputFile, '%commandClassifications%': ComandClassification }) #print "debug", toPassVar, toPassVar['%normalizeFileGrpUse%'], unit.fileGrpUse passVar = replacementDic(toPassVar) if toPassVar[ '%normalizeFileGrpUse%'] != unit.fileGrpUse or self.alreadyNormalizedManually( unit, ComandClassification): #print "debug: ", unit.currentPath, unit.fileGrpUse self.jobChainLink.linkProcessingComplete( self.exitCode, passVar=self.jobChainLink.passVar) else: taskType = databaseInterface.queryAllSQL( "SELECT pk FROM TaskTypes WHERE description = '%s';" % ("Transcoder task type"))[0][0] if fileIdentificationRestriction: sql = """SELECT MicroServiceChainLinks.pk, CommandRelationships.pk, CommandRelationships.command FROM FilesIdentifiedIDs JOIN FileIDs ON FilesIdentifiedIDs.fileID = FileIDs.pk JOIN FileIDTypes ON FileIDs.fileIDType = FileIDTypes.pk JOIN CommandRelationships ON FilesIdentifiedIDs.fileID = CommandRelationships.fileID JOIN CommandClassifications ON CommandClassifications.pk = CommandRelationships.commandClassification JOIN TasksConfigs ON TasksConfigs.taskTypePKReference = CommandRelationships.pk JOIN MicroServiceChainLinks ON MicroServiceChainLinks.currentTask = TasksConfigs.pk WHERE TasksConfigs.taskType = '%s' AND FilesIdentifiedIDs.fileUUID = '%s' AND CommandClassifications.classification = '%s' AND (%s) AND CommandRelationships.enabled = TRUE AND CommandClassifications.enabled = TRUE AND FileIDTypes.enabled = TRUE GROUP BY MicroServiceChainLinks.pk;""" % ( taskType, fileUUID, ComandClassification, fileIdentificationRestriction) else: sql = """SELECT MicroServiceChainLinks.pk, CommandRelationships.pk, CommandRelationships.command FROM FilesIdentifiedIDs JOIN CommandRelationships ON FilesIdentifiedIDs.fileID = CommandRelationships.fileID JOIN CommandClassifications ON CommandClassifications.pk = CommandRelationships.commandClassification JOIN TasksConfigs ON TasksConfigs.taskTypePKReference = CommandRelationships.pk JOIN MicroServiceChainLinks ON MicroServiceChainLinks.currentTask = TasksConfigs.pk WHERE TasksConfigs.taskType = '%s' AND FilesIdentifiedIDs.fileUUID = '%s' AND CommandClassifications.classification = '%s' AND CommandRelationships.enabled = TRUE AND CommandClassifications.enabled = TRUE GROUP BY MicroServiceChainLinks.pk;""" % ( taskType, fileUUID, ComandClassification) rows = databaseInterface.queryAllSQL(sql) commandsRun = {} if rows and len(rows): for row in rows: microServiceChainLink, commandRelationship, command = row if command in commandsRun: link = commandsRun[command] sql = """SELECT exitCode FROM Tasks JOIN Jobs ON Jobs.jobUUID = Tasks.jobUUID WHERE Tasks.jobUUID IN (SELECT jobUUID FROM Jobs WHERE subJobOf = '%s') AND Jobs.MicroServiceChainLinksPK = '%s';""" % ( self.jobChainLink.UUID, link) rows = databaseInterface.queryAllSQL(sql) if len(rows) != 1: print sys.stderr, "Bad query:", sql for row in rows: ret = row[0] sql = "UPDATE CommandRelationships SET countAttempts=countAttempts+1 WHERE pk='" + commandRelationship + "';" databaseInterface.runSQL(sql) if ret: column = "countNotOK" else: column = "countOK" sql = "UPDATE CommandRelationships SET " + column + "=" + column + "+1 WHERE pk='" + commandRelationship + "';" databaseInterface.runSQL(sql) else: commandsRun[command] = microServiceChainLink jobChainLink.jobChain.nextChainLink( row[0], passVar=passVar, incrementLinkSplit=True, subJobOf=self.jobChainLink.UUID) else: sql = """SELECT MicroserviceChainLink, CommandRelationships.pk, CommandRelationships.command FROM DefaultCommandsForClassifications JOIN MicroServiceChainLinks ON MicroServiceChainLinks.pk = DefaultCommandsForClassifications.MicroserviceChainLink JOIN TasksConfigs ON TasksConfigs.pk = MicroServiceChainLinks.currentTask JOIN CommandRelationships ON CommandRelationships.pk = TasksConfigs.taskTypePKReference JOIN CommandClassifications ON CommandClassifications.pk = DefaultCommandsForClassifications.forClassification WHERE TasksConfigs.taskType = '5e70152a-9c5b-4c17-b823-c9298c546eeb' AND CommandClassifications.classification = '%s' AND DefaultCommandsForClassifications.enabled = TRUE;""" % ( ComandClassification) rows = databaseInterface.queryAllSQL(sql) for row in rows: microServiceChainLink, commandRelationship, command = row if command in commandsRun: link = commandsRun[command] sql = """SELECT exitCode FROM Tasks JOIN Jobs ON Jobs.jobUUID = Tasks.jobUUID WHERE Tasks.jobUUID IN (SELECT jobUUID FROM Jobs WHERE subJobOf = '%s') AND Jobs.MicroServiceChainLinksPK = '%s';""" % ( self.jobChainLink.UUID, link) rows = databaseInterface.queryAllSQL(sql) if len(rows) != 1: print sys.stderr, "Bad query:", sql for row in rows: ret = row[0] sql = "UPDATE CommandRelationships SET countAttempts=countAttempts+1 WHERE pk='" + commandRelationship + "';" databaseInterface.runSQL(sql) if ret: column = "countNotOK" else: column = "countOK" sql = "UPDATE CommandRelationships SET " + column + "=" + column + "+1 WHERE pk='" + commandRelationship + "';" databaseInterface.runSQL(sql) else: jobChainLink.jobChain.nextChainLink( microServiceChainLink, passVar=passVar, incrementLinkSplit=True, subJobOf=self.jobChainLink.UUID) commandsRun[command] = microServiceChainLink self.jobChainLink.linkProcessingComplete( self.exitCode, passVar=self.jobChainLink.passVar)