def createDigiprovMDAgents(): ret = [] global globalDigiprovMDCounter #AGENTS sql = """SELECT agentIdentifierType, agentIdentifierValue, agentName, agentType FROM Agents;""" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: globalDigiprovMDCounter += 1 digiprovMD = etree.Element("digiprovMD") digiprovMD.set("ID", "digiprovMD_" + globalDigiprovMDCounter.__str__()) ret.append(digiprovMD) #newChild(amdSec, "digiprovMD") mdWrap = newChild(digiprovMD, "mdWrap") mdWrap.set("MDTYPE", "PREMIS:AGENT") xmlData = newChild(mdWrap, "xmlData") #agents = etree.SubElement(xmlData, "agents") xmlData.append(createAgent(row[0], row[1], row[2], row[3])) row = c.fetchone() sqlLock.release() sql = """SELECT auth_user.id, auth_user.username, auth_user.first_name, auth_user.last_name FROM SIPs JOIN Files ON SIPs.sipUUID = Files.sipUUID JOIN Events ON Files.fileUUID = Events.fileUUID JOIN auth_user ON Events.linkingAgentIdentifier = auth_user.id WHERE SIPs.sipUUID = '%s' GROUP BY auth_user.id;""" % ( fileGroupIdentifier) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: globalDigiprovMDCounter += 1 digiprovMD = etree.Element("digiprovMD") digiprovMD.set("ID", "digiprovMD_" + globalDigiprovMDCounter.__str__()) ret.append(digiprovMD) #newChild(amdSec, "digiprovMD") mdWrap = newChild(digiprovMD, "mdWrap") mdWrap.set("MDTYPE", "PREMIS:AGENT") xmlData = newChild(mdWrap, "xmlData") #agents = etree.SubElement(xmlData, "agents") id, username, first_name, last_name = row id = id.__str__() if not username: username = "" if not first_name: first_name = "" if not last_name: last_name = "" agentIdentifierType = "Archivematica user pk" agentIdentifierValue = id agentName = 'username="******", first_name="%s", last_name="%s"' % ( username, first_name, last_name) agentType = "Archivematica user" xmlData.append( createAgent(agentIdentifierType, agentIdentifierValue, agentName, agentType)) row = c.fetchone() sqlLock.release() return ret
def createDigiprovMDAgents(): ret = [] global globalDigiprovMDCounter #AGENTS sql = """SELECT agentIdentifierType, agentIdentifierValue, agentName, agentType FROM Agents;""" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: globalDigiprovMDCounter += 1 digiprovMD = etree.Element("digiprovMD") digiprovMD.set("ID", "digiprovMD_"+ globalDigiprovMDCounter.__str__()) ret.append(digiprovMD) #newChild(amdSec, "digiprovMD") mdWrap = newChild(digiprovMD,"mdWrap") mdWrap.set("MDTYPE", "PREMIS:AGENT") xmlData = newChild(mdWrap,"xmlData") #agents = etree.SubElement(xmlData, "agents") xmlData.append(createAgent(row[0], row[1], row[2], row[3])) row = c.fetchone() sqlLock.release() sql = """SELECT auth_user.id, auth_user.username, auth_user.first_name, auth_user.last_name FROM SIPs JOIN Files ON SIPs.sipUUID = Files.sipUUID JOIN Events ON Files.fileUUID = Events.fileUUID JOIN auth_user ON Events.linkingAgentIdentifier = auth_user.id WHERE SIPs.sipUUID = '%s' GROUP BY auth_user.id;""" % (fileGroupIdentifier) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: globalDigiprovMDCounter += 1 digiprovMD = etree.Element("digiprovMD") digiprovMD.set("ID", "digiprovMD_"+ globalDigiprovMDCounter.__str__()) ret.append(digiprovMD) #newChild(amdSec, "digiprovMD") mdWrap = newChild(digiprovMD,"mdWrap") mdWrap.set("MDTYPE", "PREMIS:AGENT") xmlData = newChild(mdWrap,"xmlData") #agents = etree.SubElement(xmlData, "agents") id, username, first_name, last_name = row id = id.__str__() if not username: username = "" if not first_name: first_name = "" if not last_name: last_name = "" agentIdentifierType = "Archivematica user pk" agentIdentifierValue = id agentName = 'username="******", first_name="%s", last_name="%s"' % (username, first_name, last_name) agentType = "Archivematica user" xmlData.append(createAgent(agentIdentifierType, agentIdentifierValue, agentName, agentType)) row = c.fetchone() sqlLock.release() return ret
def __init__(self, currentPath, UUID=""): self.owningUnit = None self.unitType = "Transfer" #Just Use the end of the directory name self.pathString = "%transferDirectory%" currentPath2 = currentPath.replace(archivematicaMCP.config.get('MCPServer', "sharedDirectory"), \ "%sharedPath%", 1) if UUID == "": sql = """SELECT transferUUID FROM Transfers WHERE currentLocation = '""" + MySQLdb.escape_string( currentPath2) + "'" time.sleep(.5) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: UUID = row[0] print "Opening existing Transfer:", UUID, "-", currentPath2 row = c.fetchone() sqlLock.release() if UUID == "": uuidLen = -36 if archivematicaMCP.isUUID(currentPath[uuidLen - 1:-1]): UUID = currentPath[uuidLen - 1:-1] else: UUID = uuid.uuid4().__str__() self.UUID = UUID sql = """INSERT INTO Transfers (transferUUID, currentLocation) VALUES ('""" + UUID + databaseInterface.separator + MySQLdb.escape_string( currentPath2) + "');" databaseInterface.runSQL(sql) self.currentPath = currentPath2 self.UUID = UUID self.fileList = {}
def __init__(self, jobChainLink, pk, unit): """Used to get a selection, from a list of chains, to process""" self.choices = [] self.pk = pk self.jobChainLink = jobChainLink self.UUID = uuid.uuid4().__str__() self.unit = unit self.delayTimerLock = threading.Lock() self.delayTimer = None sql = """SELECT chainAvailable, description FROM MicroServiceChainChoice JOIN MicroServiceChains on chainAvailable = MicroServiceChains.pk WHERE choiceAvailableAtLink = '%s' ORDER BY MicroServiceChainChoice.pk;""" % (jobChainLink.pk.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: print row chainAvailable = row[0] description = row[1] self.choices.append((chainAvailable, description,)) row = c.fetchone() sqlLock.release() preConfiguredChain = self.checkForPreconfiguredXML() if preConfiguredChain != None: time.sleep(archivematicaMCP.config.getint('MCPServer', "waitOnAutoApprove")) self.jobChainLink.setExitMessage("Completed successfully") jobChain.jobChain(self.unit, preConfiguredChain) else: choicesAvailableForUnitsLock.acquire() if self.delayTimer == None: self.jobChainLink.setExitMessage('Awaiting decision') choicesAvailableForUnits[self.jobChainLink.UUID] = self choicesAvailableForUnitsLock.release()
def getDublinCore(type_, id): sql = """SELECT title, creator, subject, description, publisher, contributor, date, type, format, identifier, source, relation, language, coverage, rights FROM Dublincore WHERE metadataAppliesToType = '%s' AND metadataAppliesToidentifier = '%s';""" % \ (type_.__str__(), id.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if row == None: sqlLock.release() return None ret = etree.Element("dublincore", nsmap={None: dctermsNS}) ret.set( xsiBNS + "schemaLocation", dctermsNS + " http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd") dctermsElements = ["isPartOf"] while row != None: key = [ "title", "creator", "subject", "description", "publisher", "contributor", "date", "type", "format", "identifier", "source", "relation", "language", "coverage", "rights" ] #title, creator, subject, description, publisher, contributor, date, type, format, identifier, source, relation, language, coverage, rights = row #key.index("title") == title i = 0 for term in key: if row[i] != None: txt = row[i] else: txt = "" newChild(ret, term, text=txt) i += 1 row = c.fetchone() sqlLock.release() return ret
def findOrCreateSipInDB(path, waitSleep=dbWaitSleep): UUID = "" path = path.replace(config.get('MCPServer', "sharedDirectory"), "%sharedPath%", 1) #find UUID on end of SIP path uuidLen = -36 if isUUID(path[uuidLen - 1:-1]): UUID = path[uuidLen - 1:-1] if UUID == "": #Find it in the database databaseInterface.printSQL = True sql = """SELECT sipUUID FROM SIPs WHERE currentPath = '""" + MySQLdb.escape_string( path) + "';" #if waitSleep != 0: #time.sleep(waitSleep) #let db be updated by the microservice that moved it. c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if not row: print "Not opening existing SIP:", UUID, "-", path while row != None: UUID = row[0] print "Opening existing SIP:", UUID, "-", path row = c.fetchone() sqlLock.release() #Create it if UUID == "": UUID = databaseFunctions.createSIP(path) print "DEBUG creating sip", path, UUID return UUID
def reloadFileList(self): self.fileList = {} #os.walk(top[, topdown=True[, onerror=None[, followlinks=False]]]) currentPath = self.currentPath.replace("%sharedPath%", \ archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1) + "/" for directory, subDirectories, files in os.walk(currentPath): directory = directory.replace( currentPath, "%SIPDirectory%", 1) for file in files: filePath = os.path.join(directory, file) #print filePath self.fileList[filePath] = unitFile(filePath) sql = """SELECT fileUUID, currentLocation FROM Files WHERE sipUUID = '""" + self.UUID + "'" #AND Files.removedTime = 0; TODO c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: #print row UUID = row[0] currentPath = row[1] if currentPath in self.fileList: self.fileList[currentPath].UUID = UUID else: print "todo: find deleted files/exclude" print row[99]#fail row = c.fetchone() self.fileList[filePath].UUID = UUID sqlLock.release()
def __init__(self, currentPath, UUID=""): self.owningUnit = None self.unitType = "Transfer" #Just Use the end of the directory name self.pathString = "%transferDirectory%" currentPath2 = currentPath.replace(archivematicaMCP.config.get('MCPServer', "sharedDirectory"), \ "%sharedPath%", 1) if UUID == "": sql = """SELECT transferUUID FROM Transfers WHERE currentLocation = '""" + MySQLdb.escape_string(currentPath2) + "'" time.sleep(.5) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: UUID = row[0] print "Opening existing Transfer:", UUID, "-", currentPath2 row = c.fetchone() sqlLock.release() if UUID == "": uuidLen = -36 if archivematicaMCP.isUUID(currentPath[uuidLen-1:-1]): UUID = currentPath[uuidLen-1:-1] else: UUID = uuid.uuid4().__str__() self.UUID = UUID sql = """INSERT INTO Transfers (transferUUID, currentLocation) VALUES ('""" + UUID + databaseInterface.separator + MySQLdb.escape_string(currentPath2) + "');" databaseInterface.runSQL(sql) self.currentPath = currentPath2 self.UUID = UUID self.fileList = {}
def __init__(self, commandID): self.pk = commandID self.stdOut = "" self.stdErr = "" self.exitCode=None self.failedCount=0 sql = """SELECT CT.type, C.verificationCommand, C.eventDetailCommand, C.command, C.outputLocation, C.description FROM Commands AS C JOIN CommandTypes AS CT ON C.commandType = CT.pk WHERE C.pk = """ + commandID.__str__() + """ ;""" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: rowSTR = [] for colIndex in range(len(row)): rowSTR.append(toStrFromUnicode(row[colIndex])) self.type, \ self.verificationCommand, \ self.eventDetailCommand, \ self.command, \ self.outputLocation, \ self.description = \ rowSTR if isinstance(self.command, unicode): self.command = self.command.encode('utf-8') row = c.fetchone() sqlLock.release() if self.verificationCommand: self.verificationCommand = Command(self.verificationCommand) self.verificationCommand.command = self.verificationCommand.command.replace("%outputLocation%", self.outputLocation) if self.eventDetailCommand: self.eventDetailCommand = Command(self.eventDetailCommand) self.eventDetailCommand.command = self.eventDetailCommand.command.replace("%outputLocation%", self.outputLocation)
def updateFileLocation(src, dst, eventType, eventDateTime, eventDetail, eventIdentifierUUID = uuid.uuid4().__str__(), fileUUID="None", sipUUID = None, transferUUID=None, eventOutcomeDetailNote = ""): """If the file uuid is not provided, will use the sip uuid and old path to find the file uuid""" src = unicodeToStr(src) dst = unicodeToStr(dst) fileUUID = unicodeToStr(fileUUID) if not fileUUID or fileUUID == "None": sql = "Need to define transferUUID or sipUUID" if sipUUID: sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(src) + "' AND Files.sipUUID = '" + sipUUID + "';" elif transferUUID: sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(src) + "' AND Files.transferUUID = '" + transferUUID + "';" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: fileUUID = unicodeToStr(row[0]) row = c.fetchone() sqlLock.release() if eventOutcomeDetailNote == "": eventOutcomeDetailNote = "Original name=\"%s\"; cleaned up name=\"%s\"" %(src, dst) #eventOutcomeDetailNote = eventOutcomeDetailNote.decode('utf-8') #CREATE THE EVENT if not fileUUID: print >>sys.stderr, "Unable to find file uuid for: ", src, " -> ", dst exit(6) insertIntoEvents(fileUUID=fileUUID, eventIdentifierUUID=eventIdentifierUUID, eventType=eventType, eventDateTime=eventDateTime, eventDetail=eventDetail, eventOutcome="", eventOutcomeDetailNote=eventOutcomeDetailNote) #UPDATE THE CURRENT FILE PATH sql = """UPDATE Files SET currentLocation='%s' WHERE fileUUID='%s';""" % (MySQLdb.escape_string(dst), fileUUID) databaseInterface.runSQL(sql)
def __init__(self, unit, chainPK, notifyComplete=None, passVar=None, UUID=None, subJobOf=""): print "jobChain", unit, chainPK if chainPK == None: return None self.unit = unit self.pk = chainPK self.notifyComplete = notifyComplete self.UUID = UUID self.linkSplitCount = 1 self.subJobOf = subJobOf sql = """SELECT * FROM MicroServiceChains WHERE pk = """ + chainPK.__str__() print sql c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if row == None: sqlLock.release() return None while row != None: print "jobChain", row #self.pk = row[0] self.startingChainLink = row[1] self.description = row[2] row = c.fetchone() sqlLock.release() self.currentLink = jobChainLink(self, self.startingChainLink, unit, passVar=passVar, subJobOf=subJobOf) if self.currentLink == None: return None
def process_IN_MOVED_FROM(self, event): global movedFromCount #print event #print "Transfer IN_MOVED_FROM" #Wait for a moved to, and if one doesn't occur, consider it moved outside of the system. movedFromPath = os.path.join(event.path, event.name).replace(\ self.unit.currentPath.replace("%sharedPath%", archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1), \ "%transferDirectory%", 1) filesMoved = [] sql = """SELECT fileUUID, currentLocation FROM Files WHERE transferUUID = '""" + self.unit.UUID + "' AND removedTime = 0 AND currentLocation LIKE '" + MySQLdb.escape_string(movedFromPath).replace("%", "\%") + "%';" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: #print row filesMoved.append(row) row = c.fetchone() sqlLock.release() movedFromLock.acquire() utcDate = databaseInterface.getUTCDate() timer = threading.Timer(archivematicaMCP.config.getint('MCPServer', "delayTimer"), timerExpired, args=[event, utcDate], kwargs={}) movedFrom[event.cookie] = (movedFromPath, filesMoved, timer) movedFromCount.value = movedFromCount.value + 1 movedFromLock.release() #create timer to check if it's claimed by a move to timer.start()
def getDublinCore(type_, id): sql = """SELECT title, creator, subject, description, publisher, contributor, date, type, format, identifier, source, relation, language, coverage, rights FROM Dublincore WHERE metadataAppliesToType = '%s' AND metadataAppliesToidentifier = '%s';""" % \ (type_.__str__(), id.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if row == None: sqlLock.release() return None ret = etree.Element( "dublincore", nsmap = {None:dctermsNS} ) ret.set(xsiBNS+"schemaLocation", dctermsNS + " http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd") dctermsElements= ["isPartOf"] while row != None: key = ["title", "creator", "subject", "description", "publisher", "contributor", "date", "type", "format", "identifier", "source", "relation", "language", "coverage", "rights"] #title, creator, subject, description, publisher, contributor, date, type, format, identifier, source, relation, language, coverage, rights = row #key.index("title") == title i = 0 for term in key: if row[i] != None: txt = row[i] else: txt = "" newChild(ret, term, text=txt) i+=1 row = c.fetchone() sqlLock.release() return ret
def __init__(self, unit, chainPK, notifyComplete=None, passVar=None, UUID=None, subJobOf=""): """Create an instance of a chain from the MicroServiceChains table""" print "jobChain", unit, chainPK if chainPK == None: return None self.unit = unit self.pk = chainPK self.notifyComplete = notifyComplete self.UUID = UUID self.linkSplitCount = 1 self.subJobOf = subJobOf sql = """SELECT * FROM MicroServiceChains WHERE pk = '%s'""" % (chainPK.__str__()) print sql c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if row == None: sqlLock.release() return None while row != None: print "jobChain", row #self.pk = row[0] self.startingChainLink = row[1] self.description = row[2] row = c.fetchone() sqlLock.release() self.currentLink = jobChainLink(self, self.startingChainLink, unit, passVar=passVar, subJobOf=subJobOf) if self.currentLink == None: return None
def reloadFileList(self): self.fileList = {} #os.walk(top[, topdown=True[, onerror=None[, followlinks=False]]]) currentPath = self.currentPath.replace("%sharedPath%", \ archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1) + "/" for directory, subDirectories, files in os.walk(currentPath): directory = directory.replace(currentPath, "%SIPDirectory%", 1) for file in files: filePath = os.path.join(directory, file) #print filePath self.fileList[filePath] = unitFile(filePath) sql = """SELECT fileUUID, currentLocation FROM Files WHERE sipUUID = '""" + self.UUID + "'" #AND Files.removedTime = 0; TODO c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: #print row UUID = row[0] currentPath = row[1] if currentPath in self.fileList: self.fileList[currentPath].UUID = UUID else: print "todo: find deleted files/exclude" print row[99] #fail row = c.fetchone() self.fileList[filePath].UUID = UUID sqlLock.release()
def reloadFileList(self): self.fileList = {} #os.walk(top[, topdown=True[, onerror=None[, followlinks=False]]]) currentPath = self.currentPath.replace("%sharedPath%", \ archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1) + "/" for directory, subDirectories, files in os.walk(currentPath): directory = directory.replace(currentPath, "%SIPDirectory%", 1) for file in files: if directory != "%SIPDirectory%": filePath = os.path.join(directory, file) else: filePath = directory + file #print filePath self.fileList[filePath] = unitFile(filePath, owningUnit=self) sql = """SELECT fileUUID, currentLocation, fileGrpUse FROM Files WHERE removedTime = 0 AND sipUUID = '""" + self.UUID + "'" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: #print row UUID = row[0] currentPath = row[1] fileGrpUse = row[2] if currentPath in self.fileList: self.fileList[currentPath].UUID = UUID self.fileList[currentPath].fileGrpUse = fileGrpUse else: print >> sys.stderr, self.fileList eventDetail = "SIP {" + self.UUID + "} has file {" + UUID + "}\"" + currentPath + "\" in the database, but file doesn't exist in the file system." print >> sys.stderr, "!!!", eventDetail, "!!!" row = c.fetchone() sqlLock.release()
def reloadFileList(self): self.fileList = {} #os.walk(top[, topdown=True[, onerror=None[, followlinks=False]]]) currentPath = self.currentPath.replace("%sharedPath%", \ archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1) + "/" for directory, subDirectories, files in os.walk(currentPath): directory = directory.replace( currentPath, "%SIPDirectory%", 1) for file in files: if directory != "%SIPDirectory%": filePath = os.path.join(directory, file) else: filePath = directory + file #print filePath self.fileList[filePath] = unitFile(filePath, owningUnit=self) sql = """SELECT fileUUID, currentLocation, fileGrpUse FROM Files WHERE removedTime = 0 AND sipUUID = '""" + self.UUID + "'" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: #print row UUID = row[0] currentPath = row[1] fileGrpUse = row[2] if currentPath in self.fileList: self.fileList[currentPath].UUID = UUID self.fileList[currentPath].fileGrpUse = fileGrpUse else: print >>sys.stderr, self.fileList eventDetail = "SIP {" + self.UUID + "} has file {" + UUID + "}\"" + currentPath + "\" in the database, but file doesn't exist in the file system." print >>sys.stderr, "!!!", eventDetail, "!!!" row = c.fetchone() sqlLock.release()
def findOrCreateSipInDB(path, waitSleep=dbWaitSleep): UUID = "" path = path.replace(config.get('MCPServer', "sharedDirectory"), "%sharedPath%", 1) #find UUID on end of SIP path uuidLen = -36 if isUUID(path[uuidLen-1:-1]): UUID = path[uuidLen-1:-1] if UUID == "": #Find it in the database databaseInterface.printSQL = True sql = """SELECT sipUUID FROM SIPs WHERE currentPath = '""" + MySQLdb.escape_string(path) + "';" #if waitSleep != 0: #time.sleep(waitSleep) #let db be updated by the microservice that moved it. c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if not row: print "Not opening existing SIP:", UUID, "-", path while row != None: UUID = row[0] print "Opening existing SIP:", UUID, "-", path row = c.fetchone() sqlLock.release() #Create it if UUID == "": UUID = databaseFunctions.createSIP(path) print "DEBUG creating sip", path, UUID return UUID
def watchDirectories(): """Start watching the watched directories defined in the WatchedDirectories table in the database.""" rows = [] sql = """SELECT watchedDirectoryPath, chain, onlyActOnDirectories, description FROM WatchedDirectories LEFT OUTER JOIN WatchedDirectoriesExpectedTypes ON WatchedDirectories.expectedType = WatchedDirectoriesExpectedTypes.pk""" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: rows.append(row) row = c.fetchone() sqlLock.release() for row in rows: directory = row[0].replace("%watchDirectoryPath%", config.get('MCPServer', "watchDirectoryPath"), 1) if not os.path.isdir(directory): os.makedirs(directory) for item in os.listdir(directory): if item == ".gitignore": continue item = item.decode("utf-8") path = os.path.join(unicode(directory), item) #createUnitAndJobChain(path, row) while(limitTaskThreads <= threading.activeCount() + reservedAsTaskProcessingThreads ): time.sleep(1) createUnitAndJobChainThreaded(path, row, terminate=False) actOnFiles=True if row[2]: #onlyActOnDirectories actOnFiles=False watchDirectory.archivematicaWatchDirectory(directory,variablesAdded=row, callBackFunctionAdded=createUnitAndJobChainThreaded, alertOnFiles=actOnFiles, interval=config.getint('MCPServer', "watchDirectoriesPollInterval"))
def __init__(self, jobChainLink, pk, unit): self.tasks = [] self.pk = pk self.jobChainLink = jobChainLink sql = """SELECT * FROM StandardTasksConfigs where pk = '%s'""" % (pk.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: print row #pk = row[0] filterFileEnd = row[1] filterFileStart = row[2] filterSubDir = row[3] self.requiresOutputLock = row[4] standardOutputFile = row[5] standardErrorFile = row[6] execute = row[7] self.execute = execute arguments = row[8] row = c.fetchone() sqlLock.release() #if reloadFileList: # unit.reloadFileList() # "%taskUUID%": task.UUID.__str__(), \ if filterSubDir: directory = os.path.join(unit.currentPath, filterSubDir) else: directory = unit.currentPath if self.jobChainLink.passVar != None: if isinstance(self.jobChainLink.passVar, list): for passVar in self.jobChainLink.passVar: if isinstance(passVar, replacementDic): execute, arguments, standardOutputFile, standardErrorFile = passVar.replace(execute, arguments, standardOutputFile, standardErrorFile) elif isinstance(self.jobChainLink.passVar, replacementDic): execute, arguments, standardOutputFile, standardErrorFile = self.jobChainLink.passVar.replace(execute, arguments, standardOutputFile, standardErrorFile) commandReplacementDic = unit.getReplacementDic(directory) #for each key replace all instances of the key in the command string for key in commandReplacementDic.iterkeys(): value = commandReplacementDic[key].replace("\"", ("\\\"")) if execute: execute = execute.replace(key, value) if arguments: arguments = arguments.replace(key, value) if standardOutputFile: standardOutputFile = standardOutputFile.replace(key, value) if standardErrorFile: standardErrorFile = standardErrorFile.replace(key, value) UUID = uuid.uuid4().__str__() self.task = taskStandard(self, execute, arguments, standardOutputFile, standardErrorFile, UUID=UUID) databaseFunctions.logTaskCreatedSQL(self, commandReplacementDic, UUID, arguments) t = threading.Thread(target=self.task.performTask) t.daemon = True t.start()
def checkForPreconfiguredXML(self): ret = None xmlFilePath = os.path.join( \ self.unit.currentPath.replace("%sharedPath%", archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1), \ archivematicaMCP.config.get('MCPServer', "processingXMLFile") \ ) xmlFilePath = unicodeToStr(xmlFilePath) if os.path.isfile(xmlFilePath): # For a list of items with pks: # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc; try: command = "sudo chmod 774 \"" + xmlFilePath + "\"" if isinstance(command, unicode): command = command.encode("utf-8") exitCode, stdOut, stdError = executeOrRun("command", command, "", printing=False) tree = etree.parse(xmlFilePath) root = tree.getroot() for preconfiguredChoice in root.find("preconfiguredChoices"): #if int(preconfiguredChoice.find("appliesTo").text) == self.jobChainLink.pk: if preconfiguredChoice.find("appliesTo").text == self.jobChainLink.description: desiredChoice = preconfiguredChoice.find("goToChain").text sql = """SELECT MicroServiceChains.pk FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk WHERE MicroServiceChains.description = '%s' AND MicroServiceChainChoice.choiceAvailableAtLink = %s;""" % (desiredChoice, self.jobChainLink.pk.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: ret = row[0] row = c.fetchone() sqlLock.release() try: #<delay unitAtime="yes">30</delay> delayXML = preconfiguredChoice.find("delay") unitAtimeXML = delayXML.get("unitCtime") if unitAtimeXML != None and unitAtimeXML.lower() != "no": delaySeconds=int(delayXML.text) unitTime = os.path.getmtime(self.unit.currentPath.replace("%sharedPath%", \ archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1)) nowTime=time.time() timeDifference = nowTime - unitTime timeToGo = delaySeconds - timeDifference print "time to go:", timeToGo #print "that will be: ", (nowTime + timeToGo) self.jobChainLink.setExitMessage("Waiting till: " + datetime.datetime.fromtimestamp((nowTime + timeToGo)).ctime()) t = threading.Timer(timeToGo, self.proceedWithChoice, args=[ret], kwargs={"delayTimerStart":True}) t.daemon = True self.delayTimer = t t.start() return None except Exception as inst: print >>sys.stderr, "Error parsing xml:" print >>sys.stderr, type(inst) print >>sys.stderr, inst.args except Exception as inst: print >>sys.stderr, "Error parsing xml:" print >>sys.stderr, type(inst) print >>sys.stderr, inst.args return ret
def __init__(self, jobChainLink, pk, unit): self.tasks = [] self.pk = pk self.jobChainLink = jobChainLink sql = """SELECT * FROM StandardTasksConfigs where pk = """ + pk.__str__() c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: print row #pk = row[0] filterFileEnd = row[1] filterFileStart = row[2] filterSubDir = row[3] self.requiresOutputLock = row[4] standardOutputFile = row[5] standardErrorFile = row[6] execute = row[7] self.execute = execute arguments = row[8] row = c.fetchone() sqlLock.release() #if reloadFileList: # unit.reloadFileList() # "%taskUUID%": task.UUID.__str__(), \ if filterSubDir: directory = os.path.join(unit.currentPath, filterSubDir) else: directory = unit.currentPath if self.jobChainLink.passVar != None: if isinstance(self.jobChainLink.passVar, list): for passVar in self.jobChainLink.passVar: if isinstance(passVar, replacementDic): execute, arguments, standardOutputFile, standardErrorFile = passVar.replace(execute, arguments, standardOutputFile, standardErrorFile) elif isinstance(self.jobChainLink.passVar, replacementDic): execute, arguments, standardOutputFile, standardErrorFile = self.jobChainLink.passVar.replace(execute, arguments, standardOutputFile, standardErrorFile) commandReplacementDic = unit.getReplacementDic(directory) #for each key replace all instances of the key in the command string for key in commandReplacementDic.iterkeys(): value = commandReplacementDic[key].replace("\"", ("\\\"")) if execute: execute = execute.replace(key, value) if arguments: arguments = arguments.replace(key, value) if standardOutputFile: standardOutputFile = standardOutputFile.replace(key, value) if standardErrorFile: standardErrorFile = standardErrorFile.replace(key, value) UUID = uuid.uuid4().__str__() self.task = taskStandard(self, execute, arguments, standardOutputFile, standardErrorFile, UUID=UUID) databaseFunctions.logTaskCreatedSQL(self, commandReplacementDic, UUID, arguments) t = threading.Thread(target=self.task.performTask) t.daemon = True t.start()
def checkForPreconfiguredXML(self): ret = None xmlFilePath = os.path.join( \ self.unit.currentPath.replace("%sharedPath%", archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1) + "/", \ archivematicaMCP.config.get('MCPServer', "processingXMLFile") \ ) if os.path.isfile(xmlFilePath): # For a list of items with pks: # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc; try: tree = etree.parse(xmlFilePath) root = tree.getroot() for preconfiguredChoice in root.find("preconfiguredChoices"): #if int(preconfiguredChoice.find("appliesTo").text) == self.jobChainLink.pk: if preconfiguredChoice.find("appliesTo").text == self.jobChainLink.description: desiredChoice = preconfiguredChoice.find("goToChain").text sql = """SELECT MicroServiceChoiceReplacementDic.replacementDic FROM MicroServiceChoiceReplacementDic WHERE MicroServiceChoiceReplacementDic.description = '%s' AND MicroServiceChoiceReplacementDic.choiceAvailableAtLink = '%s';""" % (desiredChoice, self.jobChainLink.pk.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: ret = row[0] row = c.fetchone() sqlLock.release() try: #<delay unitAtime="yes">30</delay> delayXML = preconfiguredChoice.find("delay") unitAtimeXML = delayXML.get("unitCtime") if unitAtimeXML != None and unitAtimeXML.lower() != "no": delaySeconds=int(delayXML.text) unitTime = os.path.getmtime(self.unit.currentPath.replace("%sharedPath%", \ archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1)) nowTime=time.time() timeDifference = nowTime - unitTime timeToGo = delaySeconds - timeDifference print "time to go:", timeToGo #print "that will be: ", (nowTime + timeToGo) self.jobChainLink.setExitMessage("Waiting till: " + datetime.datetime.fromtimestamp((nowTime + timeToGo)).ctime()) rd = replacementDic(eval(ret)) if self.jobChainLink.passVar != None: if isinstance(self.jobChainLink.passVar, replacementDic): new = {} new.update(self.jobChainLink.passVar.dic) new.update(rd.dic) rd.dic = new t = threading.Timer(timeToGo, self.jobChainLink.linkProcessingComplete, args=[0, rd], kwargs={}) t.daemon = True t.start() t2 = threading.Timer(timeToGo, self.jobChainLink.setExitMessage, args=["Completed successfully"], kwargs={}) t2.start() return waitingOnTimer except Exception as inst: print >>sys.stderr, "Error parsing xml for pre-configured choice" except Exception as inst: print >>sys.stderr, "Error parsing xml for pre-configured choice" return ret
def getNextChainLinkPK(self, exitCode): if exitCode != None: ret = self.defaultNextChainLink sql = "SELECT nextMicroServiceChainLink FROM MicroServiceChainLinksExitCodes WHERE microServiceChainLink = '%s' AND exitCode = %s" % (self.pk.__str__(), exitCode.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if row != None: ret = row[0] sqlLock.release() return ret
def getSoundFileToPlay(self, exitCode): if exitCode != None: ret = self.defaultSoundFile sql = "SELECT Sounds.fileLocation FROM MicroServiceChainLinksExitCodes LEFT OUTER JOIN Sounds ON MicroServiceChainLinksExitCodes.playSound = Sounds.pk WHERE microServiceChainLink = '%s' AND exitCode = %s" % (self.pk.__str__(), exitCode.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if row != None: ret = row[0] sqlLock.release() return ret
def something(SIPDirectory, accessDirectory, objectsDirectory, DIPDirectory, SIPUUID, date, copy=False): #exitCode = 435 exitCode = 179 print SIPDirectory #For every file, & directory Try to find the matching file & directory in the objects directory for (path, dirs, files) in os.walk(accessDirectory): for file in files: accessPath = os.path.join(path, file) objectPath = accessPath.replace(accessDirectory, objectsDirectory, 1) objectName = os.path.basename(objectPath) objectNameExtensionIndex = objectName.rfind(".") if objectNameExtensionIndex != -1: objectName = objectName[:objectNameExtensionIndex + 1] objectNameLike = os.path.join( os.path.dirname(objectPath), objectName).replace(SIPDirectory, "%SIPDirectory%", 1) #sql = "SELECT fileUUID, currentLocation FROM Files WHERE currentLocation LIKE '%s%' AND removedTime = 0 AND SIPUUID = '%s'" % (objectNameLike, SIPUUID) #ValueError: unsupported format character ''' (0x27) at index 76 sql = "SELECT fileUUID, currentLocation FROM Files WHERE currentLocation LIKE '" + objectNameLike + "%' AND removedTime = 0 AND SIPUUID = '"+ SIPUUID + "'" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if not row: print >>sys.stderr, "No corresponding object for:", accessPath.replace(SIPDirectory, "%SIPDirectory%", 1) exitCode = 1 update = [] while row != None: objectUUID = row[0] objectPath = row[1] objectExtension = objectPath.replace(objectNameLike, "", 1) print objectName[objectNameExtensionIndex + 1:], objectExtension, "\t", if objectExtension.find(".") != -1: print row = c.fetchone() continue print objectName[objectNameExtensionIndex + 1:], objectExtension, "\t", print row dipPath = os.path.join(DIPDirectory, "objects", "%s-%s" % (objectUUID, os.path.basename(accessPath))) if copy: print "TODO - copy not supported yet" else: # dest = dipPath renameAsSudo(accessPath, dest) src = accessPath.replace(SIPDirectory, "%SIPDirectory%") dst = dest.replace(SIPDirectory, "%SIPDirectory%") update.append((src, dst)) # row = c.fetchone() sqlLock.release() for src, dst in update: eventDetail = "" eventOutcomeDetailNote = "moved from=\"" + src + "\"; moved to=\"" + dst + "\"" updateFileLocation(src, dst, "movement", date, eventDetail, sipUUID=SIPUUID, eventOutcomeDetailNote = eventOutcomeDetailNote) return exitCode
def reload(self): sql = """SELECT * FROM SIPs WHERE sipUUID = '""" + self.UUID + "'" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: print row #self.UUID = row[0] self.createdTime = deUnicode(row[1]) self.currentPath = deUnicode(row[2]) row = c.fetchone() sqlLock.release()
def getMagicLink(self): ret = None sql = """SELECT magicLink, magicLinkExitMessage FROM Transfers WHERE transferUUID = '""" + self.UUID + "'" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: print row ret = row row = c.fetchone() sqlLock.release() return ret
def getSoundFileToPlay(self, exitCode): if exitCode != None: ret = self.defaultSoundFile sql = "SELECT Sounds.fileLocation FROM MicroServiceChainLinksExitCodes LEFT OUTER JOIN Sounds ON MicroServiceChainLinksExitCodes.playSound = Sounds.pk WHERE microServiceChainLink = '%s' AND exitCode = %s" % ( self.pk.__str__(), exitCode.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if row != None: ret = row[0] sqlLock.release() return ret
def getNextChainLinkPK(self, exitCode): if exitCode != None: ret = self.defaultNextChainLink sql = "SELECT nextMicroServiceChainLink FROM MicroServiceChainLinksExitCodes WHERE microServiceChainLink = '%s' AND exitCode = %s" % ( self.pk.__str__(), exitCode.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if row != None: ret = row[0] sqlLock.release() return ret
def reload(self): sql = """SELECT transferUUID, currentLocation FROM Transfers WHERE transferUUID = '""" + self.UUID + "'" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: self.UUID = deUnicode(row[0]) #self.createdTime = row[1] self.currentPath = deUnicode(row[1]) row = c.fetchone() sqlLock.release() return
def getMagicLink(self): ret = None sql = """SELECT magicLink, magicLinkExitMessage FROM SIPs WHERE sipUUID = '""" + self.UUID + "'" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: print row ret = row row = c.fetchone() sqlLock.release() return ret
def __init__(self, jobChainLink, pk, unit): self.choices = [] self.pk = pk self.jobChainLink = jobChainLink self.UUID = uuid.uuid4().__str__() self.unit = unit sql = """SELECT replacementDic, description FROM MicroServiceChoiceReplacementDic WHERE choiceAvailableAtLink = '%s'""" % ( jobChainLink.pk.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() choiceIndex = 0 while row != None: print row replacementDic_ = row[0] description_ = row[1] self.choices.append((choiceIndex, description_, replacementDic_)) row = c.fetchone() choiceIndex += 1 sqlLock.release() #print "choices", self.choices preConfiguredChain = self.checkForPreconfiguredXML() if preConfiguredChain != None: if preConfiguredChain != waitingOnTimer: #time.sleep(archivematicaMCP.config.getint('MCPServer', "waitOnAutoApprove")) #print "checking for xml file for processing rules. TODO" self.jobChainLink.setExitMessage("Completed successfully") #jobChain.jobChain(self.unit, preConfiguredChain) rd = replacementDic(eval(preConfiguredChain)) if self.jobChainLink.passVar != None: if isinstance(self.jobChainLink.passVar, list): found = False for passVar in self.jobChainLink.passVar: if isinstance(self.jobChainLink.passVar, replacementDic): new = {} new.update(self.jobChainLink.passVar.dic) new.update(rd.dic) rd.dic = [new] found = True break if not found: self.jobChainLink.passVar.append(rd) rd = self.jobChainLink.passVar else: rd = [rd] self.jobChainLink.linkProcessingComplete(0, rd) else: print "waiting on delay to resume processing on unit:", unit else: choicesAvailableForUnitsLock.acquire() self.jobChainLink.setExitMessage('Awaiting decision') choicesAvailableForUnits[self.jobChainLink.UUID] = self choicesAvailableForUnitsLock.release()
def __init__(self, jobChainLink, pk, unit): self.choices = [] self.pk = pk self.jobChainLink = jobChainLink self.UUID = uuid.uuid4().__str__() self.unit = unit sql = """SELECT replacementDic, description FROM MicroServiceChoiceReplacementDic WHERE choiceAvailableAtLink = """ + jobChainLink.pk.__str__() c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() choiceIndex = 0 while row != None: print row replacementDic_ = row[0] description_ = row[1] self.choices.append((choiceIndex, description_, replacementDic_)) row = c.fetchone() choiceIndex += 1 sqlLock.release() #print "choices", self.choices preConfiguredChain = self.checkForPreconfiguredXML() if preConfiguredChain != None: if preConfiguredChain != waitingOnTimer: #time.sleep(archivematicaMCP.config.getint('MCPServer', "waitOnAutoApprove")) #print "checking for xml file for processing rules. TODO" self.jobChainLink.setExitMessage("Completed successfully") #jobChain.jobChain(self.unit, preConfiguredChain) rd = replacementDic(eval(preConfiguredChain)) if self.jobChainLink.passVar != None: if isinstance(self.jobChainLink.passVar, list): found = False for passVar in self.jobChainLink.passVar: if isinstance(self.jobChainLink.passVar, replacementDic): new = {} new.update(self.jobChainLink.passVar.dic) new.update(rd.dic) rd.dic = [new] found = True break if not found: self.jobChainLink.passVar.append(rd) rd = self.jobChainLink.passVar else: rd = [rd] self.jobChainLink.linkProcessingComplete(0, rd) else: print "waiting on delay to resume processing on unit:", unit else: choicesAvailableForUnitsLock.acquire() self.jobChainLink.setExitMessage('Awaiting decision') choicesAvailableForUnits[self.jobChainLink.UUID] = self choicesAvailableForUnitsLock.release()
def getMagicLink(self): """Load a link from the unit to process. Deprecated! Replaced with Set/Load Unit Variable""" ret = None sql = """SELECT magicLink, magicLinkExitMessage FROM Transfers WHERE transferUUID = '""" + self.UUID + "'" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: print row ret = row row = c.fetchone() sqlLock.release() return ret
def updateExitMessage(self, exitCode): ret = self.defaultExitMessage if exitCode != None: sql = "SELECT exitMessage FROM MicroServiceChainLinksExitCodes WHERE microServiceChainLink = '%s' AND exitCode = %s" % (self.pk.__str__(), exitCode.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if row != None: ret = row[0] sqlLock.release() if ret != None: self.setExitMessage(ret) else: print "No exit message"
def main(sipUUID, transfersMetadataDirectory, transfersLogsDirectory, sharedPath=""): if not os.path.exists(transfersMetadataDirectory): os.makedirs(transfersMetadataDirectory) if not os.path.exists(transfersLogsDirectory): os.makedirs(transfersLogsDirectory) exitCode = 0 sql = """SELECT Files.transferUUID, Transfers.currentLocation FROM Files JOIN Transfers on Transfers.transferUUID = Files.transferUUID WHERE sipUUID = '%s' GROUP BY Files.transferUUID;""" % (sipUUID) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: try: transferUUID = row[0] transferPath = row[1] if sharedPath != "": transferPath = transferPath.replace("%sharedPath%", sharedPath, 1) transferBasename = os.path.basename(os.path.abspath(transferPath)) transferMetaDestDir = os.path.join(transfersMetadataDirectory, transferBasename) transfersLogsDestDir = os.path.join(transfersLogsDirectory, transferBasename) if not os.path.exists(transferMetaDestDir): os.makedirs(transferMetaDestDir) transferMetadataDirectory = os.path.join(transferPath, "metadata") for met in os.listdir(transferMetadataDirectory): if met == "submissionDocumentation": continue item = os.path.join(transferMetadataDirectory, met) if os.path.isdir(item): shutil.copytree(item, os.path.join(transferMetaDestDir, met)) else: shutil.copy(item, os.path.join(transferMetaDestDir, met)) print "copied: ", transferPath + "metadata", " -> ", os.path.join(transferMetaDestDir, "metadata") if not os.path.exists(transfersLogsDestDir): os.makedirs(transfersLogsDestDir) shutil.copytree(transferPath + "logs", os.path.join(transfersLogsDestDir, "logs")) print "copied: ", transferPath + "logs", " -> ", os.path.join(transfersLogsDestDir, "logs") except Exception as inst: print >>sys.stderr, type(inst) print >>sys.stderr, inst.args traceback.print_exc(file=sys.stderr) print >>sys.stderr, "Error with transfer: ", row exitCode += 1 row = c.fetchone() sqlLock.release() exit(exitCode)
def createDigiprovMD(fileUUID): ret = [] #EVENTS sql = "SELECT pk, fileUUID, eventIdentifierUUID, eventType, eventDateTime, eventDetail, eventOutcome, eventOutcomeDetailNote, linkingAgentIdentifier FROM Events WHERE fileUUID = '" + fileUUID + "';" rows = databaseInterface.queryAllSQL(sql) for row in rows: digiprovMD = etree.Element("digiprovMD") ret.append(digiprovMD) #newChild(amdSec, "digiprovMD") #digiprovMD.set("ID", "digiprov-"+ os.path.basename(filename) + "-" + fileUUID) global globalDigiprovMDCounter globalDigiprovMDCounter += 1 digiprovMD.set("ID", "digiprovMD_"+ globalDigiprovMDCounter.__str__()) mdWrap = newChild(digiprovMD,"mdWrap") mdWrap.set("MDTYPE", "PREMIS:EVENT") xmlData = newChild(mdWrap,"xmlData") event = etree.SubElement(xmlData, "event", nsmap={None: premisNS}) event.set(xsiBNS+"schemaLocation", premisNS + " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd") event.set("version", "2.2") eventIdentifier = etree.SubElement(event, "eventIdentifier") etree.SubElement(eventIdentifier, "eventIdentifierType").text = "UUID" etree.SubElement(eventIdentifier, "eventIdentifierValue").text = row[2] etree.SubElement(event, "eventType").text = row[3] etree.SubElement(event, "eventDateTime").text = row[4].__str__().replace(" ", "T") etree.SubElement(event, "eventDetail").text = escape(row[5]) eventOutcomeInformation = etree.SubElement(event, "eventOutcomeInformation") etree.SubElement(eventOutcomeInformation, "eventOutcome").text = row[6] eventOutcomeDetail = etree.SubElement(eventOutcomeInformation, "eventOutcomeDetail") etree.SubElement(eventOutcomeDetail, "eventOutcomeDetailNote").text = escape(row[7]) if row[8]: linkingAgentIdentifier = etree.SubElement(event, "linkingAgentIdentifier") etree.SubElement(linkingAgentIdentifier, "linkingAgentIdentifierType").text = "Archivematica user pk" etree.SubElement(linkingAgentIdentifier, "linkingAgentIdentifierValue").text = row[8].__str__() #linkingAgentIdentifier sql = """SELECT agentIdentifierType, agentIdentifierValue, agentName, agentType FROM Agents;""" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: linkingAgentIdentifier = etree.SubElement(event, "linkingAgentIdentifier") etree.SubElement(linkingAgentIdentifier, "linkingAgentIdentifierType").text = row[0] etree.SubElement(linkingAgentIdentifier, "linkingAgentIdentifierValue").text = row[1] row = c.fetchone() sqlLock.release() return ret
def get_pairs(dip_uuid): pairs = dict() #connect to archivematica db, make a set of pairs from pairs table sql = """SELECT fileUUID, resourceId, resourceComponentId from AtkDIPObjectResourcePairing where dipUUID = %s""" c, sqlLock = databaseInterface.querySQL(sql, (dip_uuid,)) dbresult = c.fetchall() for item in dbresult: ids = dict() ids['rid'] = item[1] ids['rcid'] = item[2] pairs[item[0]] = ids sqlLock.release() return pairs
def updateExitMessage(self, exitCode): ret = self.defaultExitMessage if exitCode != None: sql = "SELECT exitMessage FROM MicroServiceChainLinksExitCodes WHERE microServiceChainLink = '%s' AND exitCode = %s" % ( self.pk.__str__(), exitCode.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if row != None: ret = row[0] sqlLock.release() if ret != None: self.setExitMessage(ret) else: print "No exit message"
def get_pairs(dip_uuid): pairs = dict() #connect to archivematica db, make a set of pairs from pairs table sql = """SELECT fileUUID, resourceId, resourceComponentId from AtkDIPObjectResourcePairing where dipUUID = '{}'""".format(dip_uuid) logger.debug(sql) c, sqlLock = databaseInterface.querySQL(sql) dbresult = c.fetchall() for item in dbresult: ids = dict() ids['rid'] = item[1] ids['rcid'] = item[2] pairs[item[0]] = ids sqlLock.release() return pairs
def cleanWatchedDirectories(): sql = """SELECT watchedDirectoryPath FROM WatchedDirectories;""" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: try: directory = row[0].replace( "%watchDirectoryPath%", "/var/archivematica/sharedDirectory/watchedDirectories/", 1 ) removeEverythingInDirectory(directory) except Exception as inst: print "debug except 2" print type(inst) # the exception instance print inst.args # arguments stored in .args row = c.fetchone() sqlLock.release()
def cleanWatchedDirectories(): sql = """SELECT watchedDirectoryPath FROM WatchedDirectories;""" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: try: directory = row[0].replace( "%watchDirectoryPath%", "/var/archivematica/sharedDirectory/watchedDirectories/", 1) removeEverythingInDirectory(directory) except Exception as inst: print "debug except 2" print type(inst) # the exception instance print inst.args # arguments stored in .args row = c.fetchone() sqlLock.release()
def process_IN_DELETE(self, event): #print event #print "Transfer IN_DELETE" #Wait for a moved to, and if one doesn't occur, consider it moved outside of the system. movedFromPath = os.path.join(event.path, event.name).replace(\ self.unit.currentPath.replace("%sharedPath%", archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1), \ "%transferDirectory%", 1) filesMoved = [] sql = """SELECT fileUUID, currentLocation FROM Files WHERE transferUUID = '""" + self.unit.UUID + "' AND removedTime = 0 AND currentLocation LIKE '" + MySQLdb.escape_string(movedFromPath).replace("%", "\%") + "%';" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: filesMoved.append(row) row = c.fetchone() sqlLock.release() for fileUUID, currentLocation in filesMoved: fileWasRemoved(fileUUID, eventOutcomeDetailNote = "removed from: " + currentLocation)
def reloadFileList(self): """Match files to their UUID's via their location and the File table's currentLocation""" self.fileList = {} #os.walk(top[, topdown=True[, onerror=None[, followlinks=False]]]) currentPath = self.currentPath.replace("%sharedPath%", \ archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1) + "/" #print "currentPath: ", currentPath, type(currentPath) try: #print currentPath, type(currentPath) for directory, subDirectories, files in os.walk(currentPath): directory = directory.replace(currentPath, "%transferDirectory%", 1) for file in files: if "%transferDirectory%" != directory: filePath = os.path.join(directory, file) else: filePath = directory + file self.fileList[filePath] = unitFile(filePath, owningUnit=self) sql = """SELECT fileUUID, currentLocation, fileGrpUse FROM Files WHERE removedTime = 0 AND transferUUID = '""" + self.UUID + "'" #print sql c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() #print self.fileList while row != None: #print row UUID = row[0] currentPath = row[1].encode("utf-8") fileGrpUse = row[2] #print currentPath in self.fileList, row if currentPath in self.fileList: self.fileList[currentPath].UUID = UUID self.fileList[currentPath].fileGrpUse = fileGrpUse else: print >> sys.stderr, "!!!", "Transfer {" + self.UUID + "} has file {" + UUID + "}\"", currentPath, "\" in the database, but file doesn't exist in the file system.", "!!!" row = c.fetchone() sqlLock.release() except Exception as inst: traceback.print_exc(file=sys.stdout) print type(inst) print inst.args exit(1)
def __init__(self, jobChain, jobChainLinkPK, unit, passVar=None, subJobOf=""): if jobChainLinkPK == None: return None self.UUID = uuid.uuid4().__str__() self.jobChain = jobChain self.pk = jobChainLinkPK self.unit = unit self.passVar = passVar self.createdDate = databaseInterface.getUTCDate() self.subJobOf = subJobOf sql = """SELECT MicroServiceChainLinks.currentTask, MicroServiceChainLinks.defaultNextChainLink, TasksConfigs.taskType, TasksConfigs.taskTypePKReference, TasksConfigs.description, MicroServiceChainLinks.reloadFileList, Sounds.fileLocation, MicroServiceChainLinks.defaultExitMessage, MicroServiceChainLinks.microserviceGroup FROM MicroServiceChainLinks LEFT OUTER JOIN Sounds ON MicroServiceChainLinks.defaultPlaySound = Sounds.pk JOIN TasksConfigs on MicroServiceChainLinks.currentTask = TasksConfigs.pk WHERE MicroServiceChainLinks.pk = '%s'""" % ( jobChainLinkPK.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if row == None: sqlLock.release() return None while row != None: print row self.currentTask = row[0] self.defaultNextChainLink = row[1] taskType = row[2] taskTypePKReference = row[3] self.description = row[4] self.reloadFileList = row[5] self.defaultSoundFile = row[6] self.defaultExitMessage = row[7] self.microserviceGroup = row[8] row = c.fetchone() sqlLock.release() print "<<<<<<<<< ", self.description, " >>>>>>>>>" self.unit.reload() logJobCreatedSQL(self) if self.createTasks(taskType, taskTypePKReference) == None: self.getNextChainLinkPK(None)
def __init__(self, commandID, replacementDic, onSuccess=None, opts=None): self.pk = commandID self.replacementDic = replacementDic self.onSuccess = onSuccess self.stdOut = "" self.stdErr = "" self.exitCode = None self.failedCount = 0 self.opts = opts sql = """SELECT CT.type, C.verificationCommand, C.eventDetailCommand, C.command, C.outputLocation, C.description, C.outputFileFormat FROM Commands AS C JOIN CommandTypes AS CT ON C.commandType = CT.pk WHERE C.pk = '""" + commandID.__str__() + """' ;""" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: rowSTR = [] for colIndex in range(len(row)): rowSTR.append(toStrFromUnicode(row[colIndex])) self.type, \ self.verificationCommand, \ self.eventDetailCommand, \ self.command, \ self.outputLocation, \ self.description, \ self.outputFormat = \ rowSTR if isinstance(self.command, unicode): self.command = self.command.encode('utf-8') row = c.fetchone() sqlLock.release() if self.verificationCommand: self.verificationCommand = Command(self.verificationCommand, replacementDic) self.verificationCommand.command = self.verificationCommand.command.replace( "%outputLocation%", self.outputLocation) if self.eventDetailCommand: self.eventDetailCommand = Command(self.eventDetailCommand, replacementDic) self.eventDetailCommand.command = self.eventDetailCommand.command.replace( "%outputLocation%", self.outputLocation)
def reloadFileList(self): """Match files to their UUID's via their location and the File table's currentLocation""" self.fileList = {} #os.walk(top[, topdown=True[, onerror=None[, followlinks=False]]]) currentPath = self.currentPath.replace("%sharedPath%", \ archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1) + "/" #print "currentPath: ", currentPath, type(currentPath) try: #print currentPath, type(currentPath) for directory, subDirectories, files in os.walk(currentPath): directory = directory.replace( currentPath, "%transferDirectory%", 1) for file in files: if "%transferDirectory%" != directory: filePath = os.path.join(directory, file) else: filePath = directory + file self.fileList[filePath] = unitFile(filePath, owningUnit=self) sql = """SELECT fileUUID, currentLocation, fileGrpUse FROM Files WHERE removedTime = 0 AND transferUUID = '""" + self.UUID + "'" #print sql c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() #print self.fileList while row != None: #print row UUID = row[0] currentPath = row[1].encode("utf-8") fileGrpUse = row[2] #print currentPath in self.fileList, row if currentPath in self.fileList: self.fileList[currentPath].UUID = UUID self.fileList[currentPath].fileGrpUse = fileGrpUse else: print >>sys.stderr, "!!!", "Transfer {" + self.UUID + "} has file {" + UUID + "}\"", currentPath, "\" in the database, but file doesn't exist in the file system.", "!!!" row = c.fetchone() sqlLock.release() except Exception as inst: traceback.print_exc(file=sys.stdout) print type(inst) print inst.args exit(1)
def __init__(self, jobChainLink, pk, unit): self.pk = pk self.jobChainLink = jobChainLink self.UUID = uuid.uuid4().__str__() self.unit = unit ###GET THE MAGIC NUMBER FROM THE TASK stuff link = 0 sql = """SELECT execute FROM TasksConfigsAssignMagicLink where pk = '%s'""" % ( pk) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: print row link = row[0] row = c.fetchone() sqlLock.release() ###Update the unit #set the magic number self.unit.setMagicLink(link, exitStatus="") self.jobChainLink.linkProcessingComplete(0)
def verifyChecksum(fileUUID, filePath, date, eventIdentifierUUID): sql = """SELECT checksum FROM Files WHERE fileUUID = '""" + fileUUID + "'" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() checksumDB = "" while row != None: checksumDB = row[0] row = c.fetchone() sqlLock.release() if checksumDB == None or checksumDB == "" or checksumDB == "None": print >> sys.stderr, "No checksum found in database for file:", fileUUID, filePath exit(1) checksumFile = sha_for_file(filePath) eventOutcome = "" eventOutcomeDetailNote = "" exitCode = 0 if checksumFile != checksumDB: eventOutcomeDetailNote = checksumFile.__str__( ) + " != " + checksumDB.__str__() eventOutcome = "Fail" exitCode = 2 print >> sys.stderr, "Checksums do not match:", fileUUID, filePath print >> sys.stderr, eventOutcomeDetailNote else: eventOutcomeDetailNote = "%s %s" % (checksumFile.__str__(), "verified") eventOutcome = "Pass" exitCode = 0 #insertIntoEvents(fileUUID="", eventIdentifierUUID="", eventType="", eventDateTime=databaseInterface.getUTCDate(), eventDetail="", eventOutcome="", eventOutcomeDetailNote="") databaseFunctions.insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=eventIdentifierUUID, \ eventType="fixity check", \ eventDateTime=date, \ eventOutcome=eventOutcome, \ eventOutcomeDetailNote=eventOutcomeDetailNote, \ eventDetail="program=\"python\"; module=\"hashlib.sha256()\"") exit(exitCode)
def __init__(self, jobChainLink, pk, unit): self.pk = pk self.jobChainLink = jobChainLink self.UUID = uuid.uuid4().__str__() self.unit = unit sql = """SELECT variable, variableValue, defaultMicroServiceChainLink FROM TasksConfigsUnitVariableLinkPull where pk = '%s'""" % ( pk) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: print row variable, variableValue, defaultMicroServiceChainLink = row row = c.fetchone() sqlLock.release() link = self.unit.getmicroServiceChainLink( variable, variableValue, defaultMicroServiceChainLink) ###Update the unit if link != None: self.jobChainLink.setExitMessage("Completed successfully") self.jobChainLink.jobChain.nextChainLink(link)
def __init__(self, jobChainLink, pk, unit): self.pk = pk self.jobChainLink = jobChainLink self.UUID = uuid.uuid4().__str__() self.unit = unit ###GET THE MAGIC NUMBER FROM THE TASK stuff link = "" sql = """SELECT variable, variableValue, microServiceChainLink FROM TasksConfigsSetUnitVariable where pk = '%s'""" % ( pk) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: print row variable, variableValue, microServiceChainLink = row row = c.fetchone() sqlLock.release() ###Update the unit #set the magic number self.unit.setVariable(variable, variableValue, microServiceChainLink) self.jobChainLink.linkProcessingComplete(0)
def __init__(self, jobChainLink, pk, unit): """Used to get a selection, from a list of chains, to process""" self.choices = [] self.pk = pk self.jobChainLink = jobChainLink self.UUID = uuid.uuid4().__str__() self.unit = unit self.delayTimerLock = threading.Lock() self.delayTimer = None sql = """SELECT chainAvailable, description FROM MicroServiceChainChoice JOIN MicroServiceChains on chainAvailable = MicroServiceChains.pk WHERE choiceAvailableAtLink = '%s' ORDER BY MicroServiceChainChoice.pk;""" % ( jobChainLink.pk.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: print row chainAvailable = row[0] description = row[1] self.choices.append(( chainAvailable, description, )) row = c.fetchone() sqlLock.release() preConfiguredChain = self.checkForPreconfiguredXML() if preConfiguredChain != None: time.sleep( archivematicaMCP.config.getint('MCPServer', "waitOnAutoApprove")) self.jobChainLink.setExitMessage("Completed successfully") jobChain.jobChain(self.unit, preConfiguredChain) else: choicesAvailableForUnitsLock.acquire() if self.delayTimer == None: self.jobChainLink.setExitMessage('Awaiting decision') choicesAvailableForUnits[self.jobChainLink.UUID] = self choicesAvailableForUnitsLock.release()
def watchDirectories(): """Start watching the watched directories defined in the WatchedDirectories table in the database.""" rows = [] sql = """SELECT watchedDirectoryPath, chain, onlyActOnDirectories, description FROM WatchedDirectories LEFT OUTER JOIN WatchedDirectoriesExpectedTypes ON WatchedDirectories.expectedType = WatchedDirectoriesExpectedTypes.pk""" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: rows.append(row) row = c.fetchone() sqlLock.release() for row in rows: directory = row[0].replace( "%watchDirectoryPath%", config.get('MCPServer', "watchDirectoryPath"), 1) if not os.path.isdir(directory): os.makedirs(directory) for item in os.listdir(directory): if item == ".gitignore": continue item = item.decode("utf-8") path = os.path.join(unicode(directory), item) #createUnitAndJobChain(path, row) while (limitTaskThreads <= threading.activeCount() + reservedAsTaskProcessingThreads): time.sleep(1) createUnitAndJobChainThreaded(path, row, terminate=False) actOnFiles = True if row[2]: #onlyActOnDirectories actOnFiles = False watchDirectory.archivematicaWatchDirectory( directory, variablesAdded=row, callBackFunctionAdded=createUnitAndJobChainThreaded, alertOnFiles=actOnFiles, interval=config.getint('MCPServer', "watchDirectoriesPollInterval"))
def checkForPreconfiguredXML(self): ret = None xmlFilePath = os.path.join( \ self.unit.currentPath.replace("%sharedPath%", archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1) + "/", \ archivematicaMCP.config.get('MCPServer', "processingXMLFile") \ ) if os.path.isfile(xmlFilePath): # For a list of items with pks: # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc; try: tree = etree.parse(xmlFilePath) root = tree.getroot() for preconfiguredChoice in root.find("preconfiguredChoices"): #if int(preconfiguredChoice.find("appliesTo").text) == self.jobChainLink.pk: if preconfiguredChoice.find( "appliesTo").text == self.jobChainLink.description: desiredChoice = preconfiguredChoice.find( "goToChain").text sql = """SELECT MicroServiceChoiceReplacementDic.replacementDic FROM MicroServiceChoiceReplacementDic WHERE MicroServiceChoiceReplacementDic.description = '%s' AND MicroServiceChoiceReplacementDic.choiceAvailableAtLink = '%s';""" % ( desiredChoice, self.jobChainLink.pk.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: ret = row[0] row = c.fetchone() sqlLock.release() try: #<delay unitAtime="yes">30</delay> delayXML = preconfiguredChoice.find("delay") unitAtimeXML = delayXML.get("unitCtime") if unitAtimeXML != None and unitAtimeXML.lower( ) != "no": delaySeconds = int(delayXML.text) unitTime = os.path.getmtime(self.unit.currentPath.replace("%sharedPath%", \ archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1)) nowTime = time.time() timeDifference = nowTime - unitTime timeToGo = delaySeconds - timeDifference print "time to go:", timeToGo #print "that will be: ", (nowTime + timeToGo) self.jobChainLink.setExitMessage( "Waiting till: " + datetime.datetime.fromtimestamp( (nowTime + timeToGo)).ctime()) rd = replacementDic(eval(ret)) if self.jobChainLink.passVar != None: if isinstance(self.jobChainLink.passVar, replacementDic): new = {} new.update( self.jobChainLink.passVar.dic) new.update(rd.dic) rd.dic = new t = threading.Timer( timeToGo, self.jobChainLink.linkProcessingComplete, args=[0, rd], kwargs={}) t.daemon = True t.start() t2 = threading.Timer( timeToGo, self.jobChainLink.setExitMessage, args=["Completed successfully"], kwargs={}) t2.start() return waitingOnTimer except Exception as inst: print >> sys.stderr, "Error parsing xml:" print >> sys.stderr, type(inst) print >> sys.stderr, inst.args except Exception as inst: print >> sys.stderr, "Error parsing xml:" print >> sys.stderr, type(inst) print >> sys.stderr, inst.args return ret