def __init__(self, jobChainLink, pk, unit):
        global outputLock
        self.tasks = {}
        self.tasksLock = threading.Lock()
        self.pk = pk
        self.jobChainLink = jobChainLink
        self.exitCode = 0
        self.clearToNextLink = False

        opts = {"inputFile":"%relativeLocation%", "fileUUID":"%fileUUID%", 'commandClassifications':'%commandClassifications%', "taskUUID":"%taskUUID%", "objectsDirectory":"%SIPObjectsDirectory%", "logsDirectory":"%SIPLogsDirectory%", "sipUUID":"%SIPUUID%", "sipPath":"%SIPDirectory%", "fileGrpUse":"%fileGrpUse%", "normalizeFileGrpUse":"%normalizeFileGrpUse%", "excludeDirectory":"%excludeDirectory%", "standardErrorFile":"%standardErrorFile%", "standardOutputFile":"%standardOutputFile%"}
        
        SIPReplacementDic = unit.getReplacementDic(unit.currentPath)
        for optsKey, optsValue in opts.iteritems():
            if self.jobChainLink.passVar != None:
                if isinstance(self.jobChainLink.passVar, replacementDic):
                    opts[optsKey] = self.jobChainLink.passVar.replace(opts[optsKey])[0]

            commandReplacementDic = unit.getReplacementDic()
            for key, value in commandReplacementDic.iteritems():
                opts[optsKey] = opts[optsKey].replace(key, value)
            
            for key, value in SIPReplacementDic.iteritems():
                opts[optsKey] = opts[optsKey].replace(key, value)

        commandReplacementDic = unit.getReplacementDic()
        sql = """SELECT CommandRelationships.pk FROM CommandRelationships JOIN Commands ON CommandRelationships.command = Commands.pk WHERE CommandRelationships.pk = '%s';""" % (pk.__str__())
        rows = databaseInterface.queryAllSQL(sql)
        taskCount = 0
        tasksList = []
        if rows:
            self.tasksLock.acquire()
            for row in rows:
                UUID = uuid.uuid4().__str__()
                opts["taskUUID"] = UUID
                opts["CommandRelationship"] = pk.__str__()
                execute = "transcoder_cr%s" % (pk)
                deUnicode(execute)
                arguments = row.__str__()
                standardOutputFile = opts["standardOutputFile"] 
                standardErrorFile = opts["standardErrorFile"] 
                self.standardOutputFile = standardOutputFile 
                self.standardErrorFile = standardErrorFile
                self.execute = execute
                self.arguments = arguments
                task = taskStandard(self, execute, opts, standardOutputFile, standardErrorFile, outputLock=outputLock, UUID=UUID)
                self.tasks[UUID] = task
                databaseFunctions.logTaskCreatedSQL(self, commandReplacementDic, UUID, arguments)
                taskCount += 1
                tasksList.append(task)
            self.tasksLock.release()
            
            for task in tasksList:
                task.performTask()
        
        else:
            self.jobChainLink.linkProcessingComplete(self.exitCode)
Example #2
0
 def reload(self):
     sql = """SELECT * FROM SIPs WHERE sipUUID =  '""" + self.UUID + "'"
     c, sqlLock = databaseInterface.querySQL(sql)
     row = c.fetchone()
     while row != None:
         print row
         #self.UUID = row[0]
         self.createdTime = deUnicode(row[1])
         self.currentPath = deUnicode(row[2])
         row = c.fetchone()
     sqlLock.release()
 def reload(self):
     sql = """SELECT * FROM SIPs WHERE sipUUID =  '""" + self.UUID + "'"
     c, sqlLock = databaseInterface.querySQL(sql)
     row = c.fetchone()
     while row != None:
         print row
         #self.UUID = row[0]
         self.createdTime = deUnicode(row[1])
         self.currentPath = deUnicode(row[2])
         row = c.fetchone()
     sqlLock.release()
Example #4
0
 def reload(self):
     sql = """SELECT transferUUID, currentLocation FROM Transfers WHERE transferUUID =  '""" + self.UUID + "'"
     c, sqlLock = databaseInterface.querySQL(sql)
     row = c.fetchone()
     while row != None:
         self.UUID = deUnicode(row[0])
         #self.createdTime = row[1]
         self.currentPath = deUnicode(row[1])
         row = c.fetchone()
     sqlLock.release()
     return
Example #5
0
 def reload(self):
     sql = """SELECT transferUUID, currentLocation FROM Transfers WHERE transferUUID =  '""" + self.UUID + "'"
     c, sqlLock = databaseInterface.querySQL(sql)
     row = c.fetchone()
     while row != None:
         self.UUID = deUnicode(row[0])
         #self.createdTime = row[1]
         self.currentPath = deUnicode(row[1])
         row = c.fetchone()
     sqlLock.release()
     return
    def __init__(self, jobChainLink, pk, unit):
        self.tasks = []
        self.pk = pk
        self.jobChainLink = jobChainLink
        sql = """SELECT * FROM StandardTasksConfigs where pk = """ + pk.__str__()
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        while row != None:
            print row
            #pk = row[0]
            filterFileEnd = deUnicode(row[1])
            filterFileStart = deUnicode(row[2])
            filterSubDir = deUnicode(row[3])
            self.requiresOutputLock = deUnicode(row[4])
            standardOutputFile = deUnicode(row[5])
            standardErrorFile = deUnicode(row[6])
            execute = deUnicode(row[7])
            self.execute = execute
            arguments = deUnicode(row[8])
            row = c.fetchone()
        sqlLock.release()
        #if reloadFileList:
        #    unit.reloadFileList()

        #        "%taskUUID%": task.UUID.__str__(), \

        if filterSubDir:
            directory = os.path.join(unit.currentPath, filterSubDir)
        else:
            directory = unit.currentPath
        
        if self.jobChainLink.passVar != None:
            if isinstance(self.jobChainLink.passVar, list):
                for passVar in self.jobChainLink.passVar:
                    if isinstance(passVar, replacementDic):
                        execute, arguments, standardOutputFile, standardErrorFile = passVar.replace(execute, arguments, standardOutputFile, standardErrorFile)
            elif isinstance(self.jobChainLink.passVar, replacementDic):
                execute, arguments, standardOutputFile, standardErrorFile = self.jobChainLink.passVar.replace(execute, arguments, standardOutputFile, standardErrorFile)
                    
        commandReplacementDic = unit.getReplacementDic(directory)
                #for each key replace all instances of the key in the command string
        for key in commandReplacementDic.iterkeys():
            value = commandReplacementDic[key].replace("\"", ("\\\""))
            if execute:
                execute = execute.replace(key, value)
            if arguments:
                arguments = arguments.replace(key, value)
            if standardOutputFile:
                standardOutputFile = standardOutputFile.replace(key, value)
            if standardErrorFile:
                standardErrorFile = standardErrorFile.replace(key, value)
        
        UUID = uuid.uuid4().__str__()
        self.task = taskStandard(self, execute, arguments, standardOutputFile, standardErrorFile, UUID=UUID)
        databaseFunctions.logTaskCreatedSQL(self, commandReplacementDic, UUID, arguments)
        t = threading.Thread(target=self.task.performTask)
        t.daemon = True
        t.start()
def call(jobs):
    with transaction.atomic():
        for job in jobs:
            with job.JobContext():
                objectsDirectory = job.args[1]
                transferName = job.args[2]
                transferUUID = job.args[3]
                processingDirectory = job.args[4]
                autoProcessSIPDirectory = job.args[5]
                sharedPath = job.argv[6]
                transfer_objects_directory = '%transferDirectory%objects'

                for container in os.listdir(objectsDirectory):
                    sipUUID = uuid.uuid4().__str__()
                    containerPath = os.path.join(objectsDirectory, container)
                    if not os.path.isdir(containerPath):
                        job.pyprint("file (not container) found: ", container, file=sys.stderr)
                        continue

                    sipName = "%s-%s" % (transferName, container)

                    tmpSIPDir = os.path.join(processingDirectory, sipName) + "/"
                    destSIPDir = os.path.join(autoProcessSIPDirectory, sipName) + "/"
                    archivematicaFunctions.create_structured_directory(tmpSIPDir, manual_normalization=True)
                    databaseFunctions.createSIP(destSIPDir.replace(sharedPath, '%sharedPath%'), sipUUID, printfn=job.pyprint)

                    # move the objects to the SIPDir
                    for item in os.listdir(containerPath):
                        shutil.move(os.path.join(containerPath, item), os.path.join(tmpSIPDir, "objects", item))

                    # get the database list of files in the objects directory
                    # for each file, confirm it's in the SIP objects directory, and update the current location/ owning SIP'
                    directory = os.path.join(transfer_objects_directory, container)
                    files = File.objects.filter(removedtime__isnull=True,
                                                currentlocation__startswith=directory,
                                                transfer_id=transferUUID)
                    for f in files:
                        currentPath = databaseFunctions.deUnicode(f.currentlocation).replace(directory, transfer_objects_directory)
                        currentSIPFilePath = currentPath.replace("%transferDirectory%", tmpSIPDir)
                        if os.path.isfile(currentSIPFilePath):
                            f.currentlocation = currentPath.replace("%transferDirectory%", "%SIPDirectory%")
                            f.sip_id = sipUUID
                            f.save()
                        else:
                            job.pyprint("file not found: ", currentSIPFilePath, file=sys.stderr)

                    # moveSIPTo autoProcessSIPDirectory
                    shutil.move(tmpSIPDir, destSIPDir)
    def __init__(self, jobChainLink, pk, unit):
        self.tasks = {}
        self.tasksLock = threading.Lock()
        self.pk = pk
        self.jobChainLink = jobChainLink
        self.exitCode = 0
        self.clearToNextLink = False
        sql = """SELECT * FROM StandardTasksConfigs where pk = '%s'""" % (pk.__str__())
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        while row != None:
            filterFileEnd = deUnicode(row[1])
            filterFileStart = deUnicode(row[2])
            filterSubDir = deUnicode(row[3])
            requiresOutputLock = row[4]
            self.standardOutputFile = deUnicode(row[5])
            self.standardErrorFile = deUnicode(row[6])
            self.execute = deUnicode(row[7])
            self.arguments = deUnicode(row[8])
            row = c.fetchone()
        sqlLock.release()
        if requiresOutputLock:
            outputLock = threading.Lock()
        else:
            outputLock = None

        SIPReplacementDic = unit.getReplacementDic(unit.currentPath)
        self.tasksLock.acquire()
        for file, fileUnit in unit.fileList.items():
            #print "file:", file, fileUnit
            if filterFileEnd:
                if not file.endswith(filterFileEnd):
                    continue
            if filterFileStart:
                if not os.path.basename(file).startswith(filterFileStart):
                    continue
            if filterSubDir:
                #print "file", file, type(file)
                #print unit.pathString, type(unit.pathString)
                #filterSubDir = filterSubDir.encode('utf-8')
                #print filterSubDir, type(filterSubDir)

                if not file.startswith(unit.pathString + filterSubDir):
                    continue

            standardOutputFile = self.standardOutputFile
            standardErrorFile = self.standardErrorFile
            execute = self.execute
            arguments = self.arguments
            
            if self.jobChainLink.passVar != None:
                if isinstance(self.jobChainLink.passVar, list):
                    for passVar in self.jobChainLink.passVar:
                        if isinstance(passVar, replacementDic):
                            execute, arguments, standardOutputFile, standardErrorFile = passVar.replace(execute, arguments, standardOutputFile, standardErrorFile)
                elif isinstance(self.jobChainLink.passVar, replacementDic):
                    execute, arguments, standardOutputFile, standardErrorFile = self.jobChainLink.passVar.replace(execute, arguments, standardOutputFile, standardErrorFile)

            commandReplacementDic = fileUnit.getReplacementDic()
            for key in commandReplacementDic.iterkeys():
                value = commandReplacementDic[key].replace("\"", ("\\\""))
                #print "key", type(key), key
                #print "value", type(value), value
                if isinstance(value, unicode):
                    value = value.encode("utf-8")
                #key = key.encode("utf-8")
                #value = value.encode("utf-8")
                if execute:
                    execute = execute.replace(key, value)
                if arguments:
                    arguments = arguments.replace(key, value)
                if standardOutputFile:
                    standardOutputFile = standardOutputFile.replace(key, value)
                if standardErrorFile:
                    standardErrorFile = standardErrorFile.replace(key, value)

            for key in SIPReplacementDic.iterkeys():
                value = SIPReplacementDic[key].replace("\"", ("\\\""))
                #print "key", type(key), key
                #print "value", type(value), value
                if isinstance(value, unicode):
                    value = value.encode("utf-8")
                #key = key.encode("utf-8")
                #value = value.encode("utf-8")

                if execute:
                    execute = execute.replace(key, value)
                if arguments:
                    arguments = arguments.replace(key, value)
                if standardOutputFile:
                    standardOutputFile = standardOutputFile.replace(key, value)
                if standardErrorFile:
                    standardErrorFile = standardErrorFile.replace(key, value)

            UUID = uuid.uuid4().__str__()
            task = taskStandard(self, execute, arguments, standardOutputFile, standardErrorFile, outputLock=outputLock, UUID=UUID)
            self.tasks[UUID] = task
            databaseFunctions.logTaskCreatedSQL(self, commandReplacementDic, UUID, arguments)
            t = threading.Thread(target=task.performTask)
            t.daemon = True
            while(archivematicaMCP.limitTaskThreads <= threading.activeCount()):
                #print "Waiting for active threads", threading.activeCount()
                self.tasksLock.release()
                time.sleep(archivematicaMCP.limitTaskThreadsSleep)
                self.tasksLock.acquire()
            print "Active threads:", threading.activeCount()
            t.start()


        self.clearToNextLink = True
        self.tasksLock.release()
        if self.tasks == {} :
            self.jobChainLink.linkProcessingComplete(self.exitCode)
    def __init__(self, jobChainLink, pk, unit):
        self.tasks = {}
        self.tasksLock = threading.Lock()
        self.pk = pk
        self.jobChainLink = jobChainLink
        self.exitCode = 0
        self.clearToNextLink = False
        sql = """SELECT * FROM StandardTasksConfigs where pk = '%s'""" % (pk.__str__())
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        while row != None:
            filterFileEnd = deUnicode(row[1])
            filterFileStart = deUnicode(row[2])
            filterSubDir = deUnicode(row[3])
            requiresOutputLock = row[4]
            self.standardOutputFile = deUnicode(row[5])
            self.standardErrorFile = deUnicode(row[6])
            self.execute = deUnicode(row[7])
            self.arguments = deUnicode(row[8])
            row = c.fetchone()
        sqlLock.release()
        if requiresOutputLock:
            outputLock = threading.Lock()
        else:
            outputLock = None

        SIPReplacementDic = unit.getReplacementDic(unit.currentPath)
        
        SIPUUID = unit.owningUnit.UUID
        sql = """SELECT variableValue FROM UnitVariables WHERE unitType = 'SIP' AND variable = 'normalizationFileIdentificationToolIdentifierTypes' AND unitUUID = '%s';""" % (SIPUUID)
        rows = databaseInterface.queryAllSQL(sql)
        if len(rows):
            fileIdentificationRestriction = rows[0][0]
        else:
            fileIdentificationRestriction = None
        
        self.tasksLock.acquire()
        for file, fileUnit in unit.fileList.items():
            #print "file:", file, fileUnit
            if filterFileEnd:
                if not file.endswith(filterFileEnd):
                    continue
            if filterFileStart:
                if not os.path.basename(file).startswith(filterFileStart):
                    continue
            if filterSubDir:
                #print "file", file, type(file)
                #print unit.pathString, type(unit.pathString)
                #filterSubDir = filterSubDir.encode('utf-8')
                #print filterSubDir, type(filterSubDir)

                if not file.startswith(unit.pathString + filterSubDir):
                    print "skipping file", file, filterSubDir, " :   \t Doesn't start with: ", unit.pathString + filterSubDir
                    continue

            standardOutputFile = self.standardOutputFile
            standardErrorFile = self.standardErrorFile
            execute = self.execute
            arguments = self.arguments
            
            if self.jobChainLink.passVar != None:
                if isinstance(self.jobChainLink.passVar, replacementDic):
                    execute, arguments, standardOutputFile, standardErrorFile = self.jobChainLink.passVar.replace(execute, arguments, standardOutputFile, standardErrorFile)

            fileUUID = unit.UUID
            ComandClassification = self.execute
            #passVar=self.jobChainLink.passVar
            toPassVar = eval(arguments)
            toPassVar.update({"%standardErrorFile%":standardErrorFile, "%standardOutputFile%":standardOutputFile, '%commandClassifications%':ComandClassification})
            #print "debug", toPassVar, toPassVar['%normalizeFileGrpUse%'], unit.fileGrpUse
            passVar=replacementDic(toPassVar)
            if toPassVar['%normalizeFileGrpUse%'] != unit.fileGrpUse or self.alreadyNormalizedManually(unit, ComandClassification):
                #print "debug: ", unit.currentPath, unit.fileGrpUse
                self.jobChainLink.linkProcessingComplete(self.exitCode, passVar=self.jobChainLink.passVar)
            else:
                taskType = databaseInterface.queryAllSQL("SELECT pk FROM TaskTypes WHERE description = '%s';" % ("Transcoder task type"))[0][0]
                
                #find out if fileIDType superseded
                sql = """SELECT MicroServiceChainLinks.pk, CommandRelationships.pk, CommandRelationships.command FROM FilesIdentifiedIDs JOIN FileIDs ON FilesIdentifiedIDs.fileID = FileIDs.pk JOIN FileIDTypes ON FileIDs.fileIDType = FileIDTypes.pk JOIN CommandRelationships ON FilesIdentifiedIDs.fileID = CommandRelationships.fileID JOIN CommandClassifications ON CommandClassifications.pk = CommandRelationships.commandClassification JOIN TasksConfigs ON TasksConfigs.taskTypePKReference = CommandRelationships.pk JOIN MicroServiceChainLinks ON MicroServiceChainLinks.currentTask = TasksConfigs.pk WHERE TasksConfigs.taskType = '%s' AND FilesIdentifiedIDs.fileUUID = '%s' AND CommandClassifications.classification = '%s' AND (%s) AND CommandRelationships.enabled = TRUE AND CommandClassifications.enabled = TRUE AND FileIDTypes.enabled = TRUE GROUP BY MicroServiceChainLinks.pk;""" % (taskType, fileUUID, ComandClassification, "FileIDTypes.pk = 'ceff7e8c-3e78-4d51-9654-eb72efd28777'")
                rows = databaseInterface.queryAllSQL(sql)
                #-not superseded check
                if not len(rows):
                    if fileIdentificationRestriction:
                        sql = """SELECT MicroServiceChainLinks.pk, CommandRelationships.pk, CommandRelationships.command FROM FilesIdentifiedIDs JOIN FileIDs ON FilesIdentifiedIDs.fileID = FileIDs.pk JOIN FileIDTypes ON FileIDs.fileIDType = FileIDTypes.pk JOIN CommandRelationships ON FilesIdentifiedIDs.fileID = CommandRelationships.fileID JOIN CommandClassifications ON CommandClassifications.pk = CommandRelationships.commandClassification JOIN TasksConfigs ON TasksConfigs.taskTypePKReference = CommandRelationships.pk JOIN MicroServiceChainLinks ON MicroServiceChainLinks.currentTask = TasksConfigs.pk WHERE TasksConfigs.taskType = '%s' AND FilesIdentifiedIDs.fileUUID = '%s' AND CommandClassifications.classification = '%s' AND (%s) AND CommandRelationships.enabled = TRUE AND CommandClassifications.enabled = TRUE AND FileIDTypes.enabled = TRUE GROUP BY MicroServiceChainLinks.pk;""" % (taskType, fileUUID, ComandClassification, fileIdentificationRestriction)
                    else:
                        sql = """SELECT MicroServiceChainLinks.pk, CommandRelationships.pk, CommandRelationships.command FROM FilesIdentifiedIDs JOIN CommandRelationships ON FilesIdentifiedIDs.fileID = CommandRelationships.fileID JOIN CommandClassifications ON CommandClassifications.pk = CommandRelationships.commandClassification JOIN TasksConfigs ON TasksConfigs.taskTypePKReference = CommandRelationships.pk JOIN MicroServiceChainLinks ON MicroServiceChainLinks.currentTask = TasksConfigs.pk WHERE TasksConfigs.taskType = '%s' AND FilesIdentifiedIDs.fileUUID = '%s' AND CommandClassifications.classification = '%s' AND CommandRelationships.enabled = TRUE AND CommandClassifications.enabled = TRUE GROUP BY MicroServiceChainLinks.pk;""" % (taskType, fileUUID, ComandClassification)
                    rows = databaseInterface.queryAllSQL(sql)
                
                commandsRun={}
                #if the rule is defined
                if rows and len(rows):
                    for row in rows:
                        microServiceChainLink, commandRelationship, command = row
                        if command in commandsRun:
                            link = commandsRun[command]
                            sql = """SELECT exitCode FROM Tasks JOIN Jobs ON Jobs.jobUUID = Tasks.jobUUID WHERE Tasks.jobUUID IN (SELECT jobUUID FROM Jobs WHERE subJobOf = '%s') AND Jobs.MicroServiceChainLinksPK = '%s';""" % (self.jobChainLink.UUID, link)
                            rows = databaseInterface.queryAllSQL(sql)
                            if len(rows) != 1:
                                print sys.stderr, "Bad query:", sql
                            for row in rows:
                                ret = row[0]
                                sql = "UPDATE CommandRelationships SET countAttempts=countAttempts+1 WHERE pk='" + commandRelationship + "';"
                                databaseInterface.runSQL(sql)
                                if ret:
                                    column = "countNotOK"
                                else:
                                    column = "countOK"
                                sql = "UPDATE CommandRelationships SET " + column + "=" + column + "+1 WHERE pk='" + commandRelationship + "';"
                                databaseInterface.runSQL(sql)
                        else:
                            commandsRun[command] = microServiceChainLink
                            jobChainLink.jobChain.nextChainLink(row[0], passVar=passVar, incrementLinkSplit=True, subJobOf=self.jobChainLink.UUID)
                #no rule defined, use default
                else:
                    sql = """SELECT MicroserviceChainLink, CommandRelationships.pk, CommandRelationships.command FROM DefaultCommandsForClassifications JOIN MicroServiceChainLinks ON MicroServiceChainLinks.pk = DefaultCommandsForClassifications.MicroserviceChainLink  JOIN TasksConfigs ON TasksConfigs.pk = MicroServiceChainLinks.currentTask  JOIN CommandRelationships ON CommandRelationships.pk = TasksConfigs.taskTypePKReference JOIN CommandClassifications ON CommandClassifications.pk = DefaultCommandsForClassifications.forClassification WHERE TasksConfigs.taskType = '5e70152a-9c5b-4c17-b823-c9298c546eeb' AND CommandClassifications.classification = '%s' AND DefaultCommandsForClassifications.enabled = TRUE;""" % (ComandClassification)
                    rows = databaseInterface.queryAllSQL(sql)
                    
                    for row in rows:
                        microServiceChainLink, commandRelationship, command = row
                        
                        if command in commandsRun:
                            link = commandsRun[command]
                            sql = """SELECT exitCode FROM Tasks JOIN Jobs ON Jobs.jobUUID = Tasks.jobUUID WHERE Tasks.jobUUID IN (SELECT jobUUID FROM Jobs WHERE subJobOf = '%s') AND Jobs.MicroServiceChainLinksPK = '%s';""" % (self.jobChainLink.UUID, link)
                            rows = databaseInterface.queryAllSQL(sql)
                            if len(rows) != 1:
                                print sys.stderr, "Bad query:", sql
                            for row in rows:
                                ret = row[0]
                                sql = "UPDATE CommandRelationships SET countAttempts=countAttempts+1 WHERE pk='" + commandRelationship + "';"
                                databaseInterface.runSQL(sql)
                                if ret:
                                    column = "countNotOK"
                                else:
                                    column = "countOK"
                                sql = "UPDATE CommandRelationships SET " + column + "=" + column + "+1 WHERE pk='" + commandRelationship + "';"
                                databaseInterface.runSQL(sql)
                        else:
                            jobChainLink.jobChain.nextChainLink(microServiceChainLink, passVar=passVar, incrementLinkSplit=True, subJobOf=self.jobChainLink.UUID)
                            commandsRun[command] = microServiceChainLink 
                self.jobChainLink.linkProcessingComplete(self.exitCode, passVar=self.jobChainLink.passVar)
Example #10
0
def process_transfer(request, transfer_uuid):
    response = {}

    if request.user.id:
        # get transfer info
        transfer = models.Transfer.objects.get(uuid=transfer_uuid)
        transfer_path = transfer.currentlocation.replace(
            '%sharedPath%',
            helpers.get_server_config_value('sharedDirectory')
        )

        import MySQLdb
        import databaseInterface
        import databaseFunctions
        import shutil

        from archivematicaCreateStructuredDirectory import createStructuredDirectory
        from archivematicaCreateStructuredDirectory import createManualNormalizedDirectoriesList
        createStructuredDirectory(transfer_path, createManualNormalizedDirectories=False)

        processingDirectory = helpers.get_server_config_value('processingDirectory')
        transfer_directory_name = os.path.basename(transfer_path[:-1])
        transfer_name = transfer_directory_name[:-37]
        sharedPath = helpers.get_server_config_value('sharedDirectory')

        tmpSIPDir = os.path.join(processingDirectory, transfer_name) + "/"
        #processSIPDirectory = os.path.join(sharedPath, 'watchedDirectories/system/autoProcessSIP') + '/'
        processSIPDirectory = os.path.join(sharedPath, 'watchedDirectories/SIPCreation/SIPsUnderConstruction') + '/'
        #destSIPDir =  os.path.join(processSIPDirectory, transfer_name) + "/"

        #destSIPDir = os.path.join(processSIPDirectory, transfer_name + '-' + ) + "/"
        createStructuredDirectory(tmpSIPDir, createManualNormalizedDirectories=False)
        objectsDirectory = os.path.join(transfer_path, 'objects') + '/'

        """
        #create row in SIPs table if one doesn't already exist
        lookup_path = destSIPDir.replace(sharedPath, '%sharedPath%')
        #lookup_path = '%sharedPath%watchedDirectories/workFlowDecisions/createDip/' + transfer_name + '/'
        sql = " " "SELECT sipUUID FROM SIPs WHERE currentPath = '" " " + MySQLdb.escape_string(lookup_path) + "';"
        rows = databaseInterface.queryAllSQL(sql)
        if len(rows) > 0:
            row = rows[0]
            sipUUID = row[0]
        else:
            sipUUID = uuid.uuid4().__str__()
            databaseFunctions.createSIP(lookup_path, sipUUID)
        """

        sipUUID = uuid.uuid4().__str__()
        destSIPDir = os.path.join(processSIPDirectory, transfer_name) + "/"
        lookup_path = destSIPDir.replace(sharedPath, '%sharedPath%')
        databaseFunctions.createSIP(lookup_path, sipUUID)

        #move the objects to the SIPDir
        for item in os.listdir(objectsDirectory):
            shutil.move(os.path.join(objectsDirectory, item), os.path.join(tmpSIPDir, "objects", item))

        #get the database list of files in the objects directory
        #for each file, confirm it's in the SIP objects directory, and update the current location/ owning SIP'
        sql = """SELECT  fileUUID, currentLocation FROM Files WHERE removedTime = 0 AND currentLocation LIKE '\%transferDirectory\%objects%' AND transferUUID =  '""" + transfer_uuid + "'"
        for row in databaseInterface.queryAllSQL(sql):
            fileUUID = row[0]
            currentPath = databaseFunctions.deUnicode(row[1])
            currentSIPFilePath = currentPath.replace("%transferDirectory%", tmpSIPDir)
            if os.path.isfile(currentSIPFilePath):
                sql = """UPDATE Files SET currentLocation='%s', sipUUID='%s' WHERE fileUUID='%s'""" % (MySQLdb.escape_string(currentPath.replace("%transferDirectory%", "%SIPDirectory%")), sipUUID, fileUUID)
                databaseInterface.runSQL(sql)
            else:
                print >>sys.stderr, "file not found: ", currentSIPFilePath

        #copy processingMCP.xml file
        src = os.path.join(os.path.dirname(objectsDirectory[:-1]), "processingMCP.xml")
        dst = os.path.join(tmpSIPDir, "processingMCP.xml")
        shutil.copy(src, dst)

        #moveSIPTo processSIPDirectory
        shutil.move(tmpSIPDir, destSIPDir)

        elasticSearchFunctions.connect_and_change_transfer_file_status(transfer_uuid, '')

        response['message'] = 'SIP ' + sipUUID + ' created.'
    else:
        response['error']   = True
        response['message'] = 'Must be logged in.'

    return HttpResponse(
        simplejson.JSONEncoder(encoding='utf-8').encode(response),
        mimetype='application/json'
    )
Example #11
0
    def __init__(self, jobChainLink, pk, unit):
        self.tasks = {}
        self.tasksLock = threading.Lock()
        self.pk = pk
        self.jobChainLink = jobChainLink
        self.exitCode = 0
        self.clearToNextLink = False
        sql = """SELECT * FROM StandardTasksConfigs where pk = '%s'""" % (
            pk.__str__())
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        while row != None:
            filterFileEnd = deUnicode(row[1])
            filterFileStart = deUnicode(row[2])
            filterSubDir = deUnicode(row[3])
            requiresOutputLock = row[4]
            self.standardOutputFile = deUnicode(row[5])
            self.standardErrorFile = deUnicode(row[6])
            self.execute = deUnicode(row[7])
            self.arguments = deUnicode(row[8])
            row = c.fetchone()
        sqlLock.release()
        if requiresOutputLock:
            outputLock = threading.Lock()
        else:
            outputLock = None

        SIPReplacementDic = unit.getReplacementDic(unit.currentPath)
        self.tasksLock.acquire()
        for file, fileUnit in unit.fileList.items():
            #print "file:", file, fileUnit
            if filterFileEnd:
                if not file.endswith(filterFileEnd):
                    continue
            if filterFileStart:
                if not os.path.basename(file).startswith(filterFileStart):
                    continue
            if filterSubDir:
                #print "file", file, type(file)
                #print unit.pathString, type(unit.pathString)
                #filterSubDir = filterSubDir.encode('utf-8')
                #print filterSubDir, type(filterSubDir)

                if not file.startswith(unit.pathString + filterSubDir):
                    continue

            standardOutputFile = self.standardOutputFile
            standardErrorFile = self.standardErrorFile
            execute = self.execute
            arguments = self.arguments

            if self.jobChainLink.passVar != None:
                if isinstance(self.jobChainLink.passVar, list):
                    for passVar in self.jobChainLink.passVar:
                        if isinstance(passVar, replacementDic):
                            execute, arguments, standardOutputFile, standardErrorFile = passVar.replace(
                                execute, arguments, standardOutputFile,
                                standardErrorFile)
                elif isinstance(self.jobChainLink.passVar, replacementDic):
                    execute, arguments, standardOutputFile, standardErrorFile = self.jobChainLink.passVar.replace(
                        execute, arguments, standardOutputFile,
                        standardErrorFile)

            commandReplacementDic = fileUnit.getReplacementDic()
            for key in commandReplacementDic.iterkeys():
                value = commandReplacementDic[key].replace("\"", ("\\\""))
                #print "key", type(key), key
                #print "value", type(value), value
                if isinstance(value, unicode):
                    value = value.encode("utf-8")
                #key = key.encode("utf-8")
                #value = value.encode("utf-8")
                if execute:
                    execute = execute.replace(key, value)
                if arguments:
                    arguments = arguments.replace(key, value)
                if standardOutputFile:
                    standardOutputFile = standardOutputFile.replace(key, value)
                if standardErrorFile:
                    standardErrorFile = standardErrorFile.replace(key, value)

            for key in SIPReplacementDic.iterkeys():
                value = SIPReplacementDic[key].replace("\"", ("\\\""))
                #print "key", type(key), key
                #print "value", type(value), value
                if isinstance(value, unicode):
                    value = value.encode("utf-8")
                #key = key.encode("utf-8")
                #value = value.encode("utf-8")

                if execute:
                    execute = execute.replace(key, value)
                if arguments:
                    arguments = arguments.replace(key, value)
                if standardOutputFile:
                    standardOutputFile = standardOutputFile.replace(key, value)
                if standardErrorFile:
                    standardErrorFile = standardErrorFile.replace(key, value)

            UUID = uuid.uuid4().__str__()
            task = taskStandard(self,
                                execute,
                                arguments,
                                standardOutputFile,
                                standardErrorFile,
                                outputLock=outputLock,
                                UUID=UUID)
            self.tasks[UUID] = task
            databaseFunctions.logTaskCreatedSQL(self, commandReplacementDic,
                                                UUID, arguments)
            t = threading.Thread(target=task.performTask)
            t.daemon = True
            while (archivematicaMCP.limitTaskThreads <=
                   threading.activeCount()):
                #print "Waiting for active threads", threading.activeCount()
                self.tasksLock.release()
                time.sleep(archivematicaMCP.limitTaskThreadsSleep)
                self.tasksLock.acquire()
            print "Active threads:", threading.activeCount()
            t.start()

        self.clearToNextLink = True
        self.tasksLock.release()
        if self.tasks == {}:
            self.jobChainLink.linkProcessingComplete(self.exitCode)
Example #12
0
    def __init__(self, jobChainLink, pk, unit):
        self.tasks = {}
        self.tasksLock = threading.Lock()
        self.pk = pk
        self.jobChainLink = jobChainLink
        self.exitCode = 0
        self.clearToNextLink = False
        sql = """SELECT filterSubDir, execute FROM TasksConfigsStartLinkForEachFile where pk = '%s'""" % (
            pk.__str__())
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()

        while row != None:
            filterFileEnd = ""  #deUnicode(row[1])
            filterFileStart = ""  #deUnicode(row[2])
            filterSubDir = deUnicode(row[0])
            requiresOutputLock = ""  #row[4]
            self.standardOutputFile = ""  #deUnicode(row[5])
            self.standardErrorFile = ""  #deUnicode(row[6])
            self.execute = deUnicode(row[1])
            self.arguments = ""  #deUnicode(row[8])
            row = c.fetchone()
        sqlLock.release()
        if requiresOutputLock:
            outputLock = threading.Lock()
        else:
            outputLock = None

        SIPReplacementDic = unit.getReplacementDic(unit.currentPath)

        self.tasksLock.acquire()
        for file, fileUnit in unit.fileList.items():
            #print "file:", file, fileUnit
            if filterFileEnd:
                if not file.endswith(filterFileEnd):
                    continue
            if filterFileStart:
                if not os.path.basename(file).startswith(filterFileStart):
                    continue
            if filterSubDir:
                #print "file", file, type(file)
                #print unit.pathString, type(unit.pathString)
                #filterSubDir = filterSubDir.encode('utf-8')
                #print filterSubDir, type(filterSubDir)

                if not file.startswith(unit.pathString + filterSubDir):
                    print "skipping file", file, filterSubDir, " :   \t Doesn't start with: ", unit.pathString + filterSubDir
                    continue

            standardOutputFile = self.standardOutputFile
            standardErrorFile = self.standardErrorFile
            execute = self.execute
            arguments = self.arguments

            if self.jobChainLink.passVar != None:
                if isinstance(self.jobChainLink.passVar, replacementDic):
                    execute, arguments, standardOutputFile, standardErrorFile = self.jobChainLink.passVar.replace(
                        execute, arguments, standardOutputFile,
                        standardErrorFile)

            commandReplacementDic = fileUnit.getReplacementDic()
            for key in commandReplacementDic.iterkeys():
                value = commandReplacementDic[key].replace("\"", ("\\\""))
                #print "key", type(key), key
                #print "value", type(value), value
                if isinstance(value, unicode):
                    value = value.encode("utf-8")
                #key = key.encode("utf-8")
                #value = value.encode("utf-8")
                if execute:
                    execute = execute.replace(key, value)
                if arguments:
                    arguments = arguments.replace(key, value)
                if standardOutputFile:
                    standardOutputFile = standardOutputFile.replace(key, value)
                if standardErrorFile:
                    standardErrorFile = standardErrorFile.replace(key, value)
            for key in SIPReplacementDic.iterkeys():
                value = SIPReplacementDic[key].replace("\"", ("\\\""))
                #print "key", type(key), key
                #print "value", type(value), value
                if isinstance(value, unicode):
                    value = value.encode("utf-8")
                #key = key.encode("utf-8")
                #value = value.encode("utf-8")

                if execute:
                    execute = execute.replace(key, value)
                if arguments:
                    arguments = arguments.replace(key, value)
                if standardOutputFile:
                    standardOutputFile = standardOutputFile.replace(key, value)
                if standardErrorFile:
                    standardErrorFile = standardErrorFile.replace(key, value)
            UUID = uuid.uuid4().__str__()
            self.tasks[UUID] = None
            ## passVar = [{preservationJobUUID, accessJobUUID, thumbnailsJobUUID}] #an idea not in use
            t = threading.Thread(target=jobChain.jobChain,
                                 args=(
                                     fileUnit,
                                     execute,
                                     self.taskCompletedCallBackFunction,
                                 ),
                                 kwargs={
                                     "passVar": self.jobChainLink.passVar,
                                     "UUID": UUID,
                                     "subJobOf":
                                     self.jobChainLink.UUID.__str__()
                                 })
            t.daemon = True
            while (archivematicaMCP.limitTaskThreads / 2 <=
                   threading.activeCount()):
                #print "Waiting for active threads", threading.activeCount()
                self.tasksLock.release()
                time.sleep(archivematicaMCP.limitTaskThreadsSleep)
                self.tasksLock.acquire()
            print "Active threads:", threading.activeCount()
            t.start()
        self.clearToNextLink = True
        self.tasksLock.release()
        if self.tasks == {}:
            self.jobChainLink.linkProcessingComplete(self.exitCode)
    def __init__(self, jobChainLink, pk, unit):
        self.tasks = {}
        self.tasksLock = threading.Lock()
        self.pk = pk
        self.jobChainLink = jobChainLink
        self.exitCode = 0
        self.clearToNextLink = False
        sql = """SELECT * FROM StandardTasksConfigs where pk = """ + pk.__str__()
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        while row != None:
            filterFileEnd = deUnicode(row[1])
            filterFileStart = deUnicode(row[2])
            filterSubDir = deUnicode(row[3])
            requiresOutputLock = row[4]
            self.standardOutputFile = deUnicode(row[5])
            self.standardErrorFile = deUnicode(row[6])
            self.execute = deUnicode(row[7])
            self.arguments = deUnicode(row[8])
            row = c.fetchone()
        sqlLock.release()
        if requiresOutputLock:
            outputLock = threading.Lock()
        else:
            outputLock = None

        SIPReplacementDic = unit.getReplacementDic(unit.currentPath)

        self.tasksLock.acquire()
        print "Debug - ", unit.fileList.items()
        for file, fileUnit in unit.fileList.items():
            # print "file:", file, fileUnit
            if filterFileEnd:
                if not file.endswith(filterFileEnd):
                    continue
            if filterFileStart:
                if not os.path.basename(file).startswith(filterFileStart):
                    continue
            if filterSubDir:
                # print "file", file, type(file)
                # print unit.pathString, type(unit.pathString)
                # filterSubDir = filterSubDir.encode('utf-8')
                # print filterSubDir, type(filterSubDir)

                if not file.startswith(unit.pathString + filterSubDir):
                    print "skipping file", file, filterSubDir
                    continue

            standardOutputFile = self.standardOutputFile
            standardErrorFile = self.standardErrorFile
            execute = self.execute
            arguments = self.arguments

            if self.jobChainLink.passVar != None:
                if isinstance(self.jobChainLink.passVar, replacementDic):
                    execute, arguments, standardOutputFile, standardErrorFile = self.jobChainLink.passVar.replace(
                        execute, arguments, standardOutputFile, standardErrorFile
                    )

            commandReplacementDic = fileUnit.getReplacementDic()
            for key in commandReplacementDic.iterkeys():
                value = commandReplacementDic[key].replace('"', ('\\"'))
                # print "key", type(key), key
                # print "value", type(value), value
                if isinstance(value, unicode):
                    value = value.encode("utf-8")
                # key = key.encode("utf-8")
                # value = value.encode("utf-8")
                if execute:
                    execute = execute.replace(key, value)
                if arguments:
                    arguments = arguments.replace(key, value)
                if standardOutputFile:
                    standardOutputFile = standardOutputFile.replace(key, value)
                if standardErrorFile:
                    standardErrorFile = standardErrorFile.replace(key, value)
            for key in SIPReplacementDic.iterkeys():
                value = SIPReplacementDic[key].replace('"', ('\\"'))
                # print "key", type(key), key
                # print "value", type(value), value
                if isinstance(value, unicode):
                    value = value.encode("utf-8")
                # key = key.encode("utf-8")
                # value = value.encode("utf-8")

                if execute:
                    execute = execute.replace(key, value)
                if arguments:
                    arguments = arguments.replace(key, value)
                if standardOutputFile:
                    standardOutputFile = standardOutputFile.replace(key, value)
                if standardErrorFile:
                    standardErrorFile = standardErrorFile.replace(key, value)
            UUID = uuid.uuid4().__str__()
            self.tasks[UUID] = None
            ## passVar = [{preservationJobUUID, accessJobUUID, thumbnailsJobUUID}] #an idea not in use
            t = threading.Thread(
                target=jobChain.jobChain,
                args=(fileUnit, execute, self.taskCompletedCallBackFunction),
                kwargs={
                    "passVar": self.jobChainLink.passVar,
                    "UUID": UUID,
                    "subJobOf": self.jobChainLink.UUID.__str__(),
                },
            )
            t.daemon = True
            while archivematicaMCP.limitTaskThreads / 2 <= threading.activeCount():
                # print "Waiting for active threads", threading.activeCount()
                self.tasksLock.release()
                time.sleep(archivematicaMCP.limitTaskThreadsSleep)
                self.tasksLock.acquire()
            print "Active threads:", threading.activeCount()
            t.start()
        self.clearToNextLink = True
        self.tasksLock.release()
        if self.tasks == {}:
            self.jobChainLink.linkProcessingComplete(self.exitCode)
Example #14
0
def process_transfer(request, transfer_uuid):
    response = {}

    if request.user.id:
        # get transfer info
        transfer = models.Transfer.objects.get(uuid=transfer_uuid)
        transfer_path = transfer.currentlocation.replace(
            '%sharedPath%', helpers.get_server_config_value('sharedDirectory'))

        import MySQLdb
        import databaseInterface
        import databaseFunctions
        import shutil

        from archivematicaCreateStructuredDirectory import createStructuredDirectory
        from archivematicaCreateStructuredDirectory import createManualNormalizedDirectoriesList
        createStructuredDirectory(transfer_path,
                                  createManualNormalizedDirectories=False)

        processingDirectory = helpers.get_server_config_value(
            'processingDirectory')
        transfer_directory_name = os.path.basename(transfer_path[:-1])
        transfer_name = transfer_directory_name[:-37]
        sharedPath = helpers.get_server_config_value('sharedDirectory')

        tmpSIPDir = os.path.join(processingDirectory, transfer_name) + "/"
        #processSIPDirectory = os.path.join(sharedPath, 'watchedDirectories/system/autoProcessSIP') + '/'
        processSIPDirectory = os.path.join(
            sharedPath,
            'watchedDirectories/SIPCreation/SIPsUnderConstruction') + '/'
        #destSIPDir =  os.path.join(processSIPDirectory, transfer_name) + "/"

        #destSIPDir = os.path.join(processSIPDirectory, transfer_name + '-' + ) + "/"
        createStructuredDirectory(tmpSIPDir,
                                  createManualNormalizedDirectories=False)
        objectsDirectory = os.path.join(transfer_path, 'objects') + '/'
        """
        #create row in SIPs table if one doesn't already exist
        lookup_path = destSIPDir.replace(sharedPath, '%sharedPath%')
        #lookup_path = '%sharedPath%watchedDirectories/workFlowDecisions/createDip/' + transfer_name + '/'
        sql = " " "SELECT sipUUID FROM SIPs WHERE currentPath = '" " " + MySQLdb.escape_string(lookup_path) + "';"
        rows = databaseInterface.queryAllSQL(sql)
        if len(rows) > 0:
            row = rows[0]
            sipUUID = row[0]
        else:
            sipUUID = uuid.uuid4().__str__()
            databaseFunctions.createSIP(lookup_path, sipUUID)
        """

        sipUUID = uuid.uuid4().__str__()
        destSIPDir = os.path.join(processSIPDirectory, transfer_name) + "/"
        lookup_path = destSIPDir.replace(sharedPath, '%sharedPath%')
        databaseFunctions.createSIP(lookup_path, sipUUID)

        #move the objects to the SIPDir
        for item in os.listdir(objectsDirectory):
            shutil.move(os.path.join(objectsDirectory, item),
                        os.path.join(tmpSIPDir, "objects", item))

        #get the database list of files in the objects directory
        #for each file, confirm it's in the SIP objects directory, and update the current location/ owning SIP'
        sql = """SELECT  fileUUID, currentLocation FROM Files WHERE removedTime = 0 AND currentLocation LIKE '\%transferDirectory\%objects%' AND transferUUID =  '""" + transfer_uuid + "'"
        for row in databaseInterface.queryAllSQL(sql):
            fileUUID = row[0]
            currentPath = databaseFunctions.deUnicode(row[1])
            currentSIPFilePath = currentPath.replace("%transferDirectory%",
                                                     tmpSIPDir)
            if os.path.isfile(currentSIPFilePath):
                sql = """UPDATE Files SET currentLocation='%s', sipUUID='%s' WHERE fileUUID='%s'""" % (
                    MySQLdb.escape_string(
                        currentPath.replace(
                            "%transferDirectory%",
                            "%SIPDirectory%")), sipUUID, fileUUID)
                databaseInterface.runSQL(sql)
            else:
                print >> sys.stderr, "file not found: ", currentSIPFilePath

        #copy processingMCP.xml file
        src = os.path.join(os.path.dirname(objectsDirectory[:-1]),
                           "processingMCP.xml")
        dst = os.path.join(tmpSIPDir, "processingMCP.xml")
        shutil.copy(src, dst)

        #moveSIPTo processSIPDirectory
        shutil.move(tmpSIPDir, destSIPDir)

        elasticSearchFunctions.connect_and_change_transfer_file_status(
            transfer_uuid, '')

        response['message'] = 'SIP ' + sipUUID + ' created.'
    else:
        response['error'] = True
        response['message'] = 'Must be logged in.'

    return HttpResponse(
        simplejson.JSONEncoder(encoding='utf-8').encode(response),
        mimetype='application/json')
Example #15
0
def call(jobs):
    with transaction.atomic():
        for job in jobs:
            with job.JobContext():
                objectsDirectory = job.args[1]
                transferName = job.args[2]
                transferUUID = job.args[3]
                processingDirectory = job.args[4]
                autoProcessSIPDirectory = job.args[5]
                sharedPath = job.args[6]
                sipName = transferName

                tmpSIPDir = os.path.join(processingDirectory, sipName) + "/"
                destSIPDir = os.path.join(autoProcessSIPDirectory,
                                          sipName) + "/"
                archivematicaFunctions.create_structured_directory(
                    tmpSIPDir, manual_normalization=False)

                # If transfer is a reingested AIP, then pass that info to the SIP
                sip_type = 'SIP'
                sip_uuid = None
                transfer = Transfer.objects.get(uuid=transferUUID)
                if transfer.type == 'Archivematica AIP':
                    sip_type = 'AIP-REIN'
                    # Use reingested AIP's UUID as the SIP UUID
                    # Get AIP UUID from reingest METS name
                    job.pyprint(
                        'path', os.path.join(objectsDirectory, '..',
                                             'metadata'), 'listdir',
                        os.listdir(
                            os.path.join(objectsDirectory, '..', 'metadata')))
                    for item in os.listdir(
                            os.path.join(objectsDirectory, '..', 'metadata')):
                        if item.startswith('METS'):
                            sip_uuid = item.replace('METS.',
                                                    '').replace('.xml', '')
                job.pyprint('sip_uuid', sip_uuid)
                job.pyprint('sip_type', sip_type)

                # Find out if any ``Directory`` models were created for the source
                # ``Transfer``. If so, this fact gets recorded in the new ``SIP`` model.
                dir_mdls = Directory.objects.filter(
                    transfer_id=transferUUID,
                    currentlocation__startswith='%transferDirectory%objects')
                diruuids = len(dir_mdls) > 0

                # Create row in SIPs table if one doesn't already exist
                lookup_path = destSIPDir.replace(sharedPath, '%sharedPath%')
                try:
                    sip = SIP.objects.get(currentpath=lookup_path).uuid
                    if diruuids:
                        sip.diruuids = True
                        sip.save()
                except SIP.DoesNotExist:
                    sip_uuid = databaseFunctions.createSIP(lookup_path,
                                                           UUID=sip_uuid,
                                                           sip_type=sip_type,
                                                           diruuids=diruuids,
                                                           printfn=job.pyprint)
                    sip = SIP.objects.get(uuid=sip_uuid)

                # Move the objects to the SIPDir
                for item in os.listdir(objectsDirectory):
                    src_path = os.path.join(objectsDirectory, item)
                    dst_path = os.path.join(tmpSIPDir, "objects", item)
                    # If dst_path already exists and is a directory, shutil.move
                    # will move src_path into it rather than overwriting it;
                    # to avoid incorrectly-nested paths, move src_path's contents
                    # into it instead.
                    if os.path.exists(dst_path) and os.path.isdir(src_path):
                        for subitem in os.listdir(src_path):
                            shutil.move(os.path.join(src_path, subitem),
                                        dst_path)
                    else:
                        shutil.move(src_path, dst_path)

                # Get the ``Directory`` models representing the subdirectories in the
                # objects/ directory. For each subdirectory, confirm it's in the SIP
                # objects/ directory, and update the current location and owning SIP.
                for dir_mdl in dir_mdls:
                    currentPath = databaseFunctions.deUnicode(
                        dir_mdl.currentlocation)
                    currentSIPDirPath = currentPath.replace(
                        "%transferDirectory%", tmpSIPDir)
                    if os.path.isdir(currentSIPDirPath):
                        dir_mdl.currentlocation = currentPath.replace(
                            "%transferDirectory%", "%SIPDirectory%")
                        dir_mdl.sip = sip
                        dir_mdl.save()
                    else:
                        job.pyprint("directory not found: ",
                                    currentSIPDirPath,
                                    file=sys.stderr)

                # Get the database list of files in the objects directory.
                # For each file, confirm it's in the SIP objects directory, and update the
                # current location/ owning SIP'
                files = File.objects.filter(
                    transfer_id=transferUUID,
                    currentlocation__startswith='%transferDirectory%objects',
                    removedtime__isnull=True)
                for f in files:
                    currentPath = databaseFunctions.deUnicode(
                        f.currentlocation)
                    currentSIPFilePath = currentPath.replace(
                        "%transferDirectory%", tmpSIPDir)
                    if os.path.isfile(currentSIPFilePath):
                        f.currentlocation = currentPath.replace(
                            "%transferDirectory%", "%SIPDirectory%")
                        f.sip = sip
                        f.save()
                    else:
                        job.pyprint("file not found: ",
                                    currentSIPFilePath,
                                    file=sys.stderr)

                archivematicaFunctions.create_directories(
                    archivematicaFunctions.MANUAL_NORMALIZATION_DIRECTORIES,
                    basepath=tmpSIPDir)

                # Copy the JSON metadata file, if present; this contains a
                # serialized copy of DC metadata entered in the dashboard UI
                # during the transfer.
                src = os.path.normpath(
                    os.path.join(objectsDirectory, "..", "metadata",
                                 "dc.json"))
                dst = os.path.join(tmpSIPDir, "metadata", "dc.json")
                if os.path.exists(src):
                    shutil.copy(src, dst)

                # Copy processingMCP.xml file
                src = os.path.join(os.path.dirname(objectsDirectory[:-1]),
                                   "processingMCP.xml")
                dst = os.path.join(tmpSIPDir, "processingMCP.xml")
                shutil.copy(src, dst)

                # moveSIPTo autoProcessSIPDirectory
                shutil.move(tmpSIPDir, destSIPDir)
    def __init__(self, jobChainLink, pk, unit):
        self.tasks = {}
        self.tasksLock = threading.Lock()
        self.pk = pk
        self.jobChainLink = jobChainLink
        self.exitCode = 0
        self.clearToNextLink = False
        sql = """SELECT * FROM StandardTasksConfigs where pk = """ + pk.__str__()
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        while row != None:
            filterFileEnd = deUnicode(row[1])
            filterFileStart = deUnicode(row[2])
            filterSubDir = deUnicode(row[3])
            requiresOutputLock = row[4]
            self.standardOutputFile = deUnicode(row[5])
            self.standardErrorFile = deUnicode(row[6])
            self.execute = deUnicode(row[7])
            self.arguments = deUnicode(row[8])
            row = c.fetchone()
        sqlLock.release()
        if requiresOutputLock:
            outputLock = threading.Lock()
        else:
            outputLock = None

        SIPReplacementDic = unit.getReplacementDic(unit.currentPath)

        self.tasksLock.acquire()
        for file, fileUnit in unit.fileList.items():
            #print "file:", file, fileUnit
            if filterFileEnd:
                if not file.endswith(filterFileEnd):
                    continue
            if filterFileStart:
                if not os.path.basename(file).startswith(filterFileStart):
                    continue
            if filterSubDir:
                #print "file", file, type(file)
                #print unit.pathString, type(unit.pathString)
                #filterSubDir = filterSubDir.encode('utf-8')
                #print filterSubDir, type(filterSubDir)

                if not file.startswith(unit.pathString + filterSubDir):
                    print "skipping file", file, filterSubDir
                    continue

            standardOutputFile = self.standardOutputFile
            standardErrorFile = self.standardErrorFile
            execute = self.execute
            arguments = self.arguments
            
            if self.jobChainLink.passVar != None:
                if isinstance(self.jobChainLink.passVar, replacementDic):
                    execute, arguments, standardOutputFile, standardErrorFile = self.jobChainLink.passVar.replace(execute, arguments, standardOutputFile, standardErrorFile)

            fileUUID = unit.UUID
            ComandClassification = self.execute
            #passVar=self.jobChainLink.passVar
            toPassVar = eval(arguments)
            toPassVar.update({"%standardErrorFile%":standardErrorFile, "%standardOutputFile%":standardOutputFile, '%commandClassifications%':ComandClassification})
            print "debug", toPassVar
            passVar=replacementDic(toPassVar)
            sql = """SELECT MicroServiceChainLinks.pk FROM FilesIdentifiedIDs JOIN CommandRelationships ON FilesIdentifiedIDs.fileID = CommandRelationships.fileID JOIN CommandClassifications ON CommandClassifications.pk = CommandRelationships.commandClassification JOIN TasksConfigs ON TasksConfigs.taskTypePKReference = CommandRelationships.pk JOIN MicroServiceChainLinks ON MicroServiceChainLinks.currentTask = TasksConfigs.pk WHERE TasksConfigs.taskType = 8 AND FilesIdentifiedIDs.fileUUID = '%s' AND CommandClassifications.classification = '%s';""" % (fileUUID, ComandClassification)
            rows = databaseInterface.queryAllSQL(sql)
            if rows and len(rows):
                print "DEBUGGING 6772: ", fileUUID, ComandClassification, rows
                for row in rows:
                     jobChainLink.jobChain.nextChainLink(row[0], passVar=passVar, incrementLinkSplit=True, subJobOf=self.jobChainLink.UUID)
            else:
                sql = """SELECT MicroserviceChainLink FROM DefaultCommandsForClassifications JOIN CommandClassifications ON CommandClassifications.pk = DefaultCommandsForClassifications.forClassification WHERE CommandClassifications.classification = '%s'""" % (ComandClassification)
                rows = databaseInterface.queryAllSQL(sql)
                print "DEBUGGING2 6772: ", fileUUID, ComandClassification, rows
                for row in rows:
                     jobChainLink.jobChain.nextChainLink(row[0], passVar=passVar, incrementLinkSplit=True, subJobOf=self.jobChainLink.UUID)
                
            self.jobChainLink.linkProcessingComplete(self.exitCode, passVar=self.jobChainLink.passVar)
        if not os.path.isdir(containerPath):
            print >>sys.stderr, "file (not container) found: ", container
            continue
            
        sipName = "%s-%s" % (transferName, container) 
        
        tmpSIPDir = os.path.join(processingDirectory, sipName) + "/"
        destSIPDir =  os.path.join(autoProcessSIPDirectory, sipName) + "/"
        createStructuredDirectory(tmpSIPDir, createManualNormalizedDirectories=True)
        databaseFunctions.createSIP(destSIPDir.replace(sharedPath, '%sharedPath%'), sipUUID)
    
        #move the objects to the SIPDir
        for item in os.listdir(containerPath):
            shutil.move(os.path.join(containerPath, item), os.path.join(tmpSIPDir, "objects", item))
    
        #get the database list of files in the objects directory
        #for each file, confirm it's in the SIP objects directory, and update the current location/ owning SIP'
        sql = """SELECT  fileUUID, currentLocation FROM Files WHERE removedTime = 0 AND currentLocation LIKE '\%transferDirectory\%objects/""" + container + """/%' AND transferUUID =  '""" + transferUUID + "'"
        for row in databaseInterface.queryAllSQL(sql):
            fileUUID = row[0]
            currentPath = databaseFunctions.deUnicode(row[1]).replace('%transferDirectory%objects/' + container, '%transferDirectory%objects')
            currentSIPFilePath = currentPath.replace("%transferDirectory%", tmpSIPDir)
            if os.path.isfile(currentSIPFilePath):
                sql = """UPDATE Files SET currentLocation='%s', sipUUID='%s' WHERE fileUUID='%s'""" % (MySQLdb.escape_string(currentPath.replace("%transferDirectory%", "%SIPDirectory%")), sipUUID, fileUUID)
                databaseInterface.runSQL(sql)
            else:
                print >>sys.stderr, "file not found: ", currentSIPFilePath

        #moveSIPTo autoProcessSIPDirectory
        shutil.move(tmpSIPDir, destSIPDir)
    
    sharedPath = sys.argv[6]
    sipName = transferName
    sipUUID = uuid.uuid4().__str__()


    tmpSIPDir = os.path.join(processingDirectory, sipName) + "/"
    destSIPDir =  os.path.join(autoProcessSIPDirectory, sipName) + "/"
    createStructuredDirectory(tmpSIPDir)
    databaseFunctions.createSIP(destSIPDir.replace(sharedPath, '%sharedPath%'), sipUUID)

    #move the objects to the SIPDir
    for item in os.listdir(objectsDirectory):
        shutil.move(os.path.join(objectsDirectory, item), os.path.join(tmpSIPDir, "objects", item))

    #get the database list of files in the objects directory
    #for each file, confirm it's in the SIP objects directory, and update the current location/ owning SIP'
    sql = """SELECT  fileUUID, currentLocation FROM Files WHERE removedTime = 0 AND currentLocation LIKE '\%transferDirectory\%objects%' AND transferUUID =  '""" + transferUUID + "'"
    for row in databaseInterface.queryAllSQL(sql):
        fileUUID = row[0]
        currentPath = databaseFunctions.deUnicode(row[1])
        currentSIPFilePath = currentPath.replace("%transferDirectory%", tmpSIPDir)
        if os.path.isfile(currentSIPFilePath):
            sql = """UPDATE Files SET currentLocation='%s', sipUUID='%s' WHERE fileUUID='%s'""" % (MySQLdb.escape_string(currentPath.replace("%transferDirectory%", "%SIPDirectory%")), sipUUID, fileUUID)
            databaseInterface.runSQL(sql)
        else:
            print >>sys.stderr, "file not found: ", currentSIPFilePath


    #moveSIPTo autoProcessSIPDirectory
    shutil.move(tmpSIPDir, destSIPDir)
Example #19
0
def call(jobs):
    with transaction.atomic():
        for job in jobs:
            with job.JobContext():
                objectsDirectory = job.args[1]
                transferName = job.args[2]
                transferUUID = job.args[3]
                processingDirectory = job.args[4]
                autoProcessSIPDirectory = job.args[5]
                sharedPath = job.args[6]
                sipName = transferName

                tmpSIPDir = os.path.join(processingDirectory, sipName) + "/"
                destSIPDir = os.path.join(autoProcessSIPDirectory,
                                          sipName) + "/"
                archivematicaFunctions.create_structured_directory(
                    tmpSIPDir, manual_normalization=False)

                # If transfer is a reingested AIP, then pass that info to the SIP
                sip_type = "SIP"
                sip_uuid = None
                transfer = Transfer.objects.get(uuid=transferUUID)
                if transfer.type == "Archivematica AIP":
                    sip_type = "AIP-REIN"
                    # Use reingested AIP's UUID as the SIP UUID
                    # Get AIP UUID from reingest METS name
                    job.pyprint(
                        "path",
                        os.path.join(objectsDirectory, "..", "metadata"),
                        "listdir",
                        os.listdir(
                            os.path.join(objectsDirectory, "..", "metadata")),
                    )
                    for item in os.listdir(
                            os.path.join(objectsDirectory, "..", "metadata")):
                        if item.startswith("METS"):
                            sip_uuid = item.replace("METS.",
                                                    "").replace(".xml", "")
                job.pyprint("sip_uuid", sip_uuid)
                job.pyprint("sip_type", sip_type)

                # Find out if any ``Directory`` models were created for the source
                # ``Transfer``. If so, this fact gets recorded in the new ``SIP`` model.
                dir_mdls = Directory.objects.filter(
                    transfer_id=transferUUID,
                    currentlocation__startswith="%transferDirectory%objects",
                )
                diruuids = len(dir_mdls) > 0

                # Create row in SIPs table if one doesn't already exist
                lookup_path = destSIPDir.replace(sharedPath, "%sharedPath%")
                try:
                    sip = SIP.objects.get(currentpath=lookup_path)
                    if diruuids:
                        sip.diruuids = True
                        sip.save()
                except SIP.DoesNotExist:
                    sip_uuid = databaseFunctions.createSIP(
                        lookup_path,
                        UUID=sip_uuid,
                        sip_type=sip_type,
                        diruuids=diruuids,
                        printfn=job.pyprint,
                    )
                    sip = SIP.objects.get(uuid=sip_uuid)

                # Set activeAgent using the value in Transfer. This ensures
                # that events generated in Ingest can fall to this value in
                # scenarios where the processing config does not require user
                # interfactions, e.g. in the "automated" processing config.
                try:
                    unit_variable = UnitVariable.objects.get(
                        unittype="Transfer",
                        unituuid=transferUUID,
                        variable="activeAgent",
                    )
                except UnitVariable.DoesNotExist:
                    unit_variable = None
                if unit_variable:
                    try:
                        agent = Agent.objects.get(
                            id=unit_variable.variablevalue)
                    except Agent.DoesNotExist:
                        pass
                    else:
                        sip.update_active_agent(agent.userprofile.user_id)

                # Move the objects to the SIPDir
                for item in os.listdir(objectsDirectory):
                    src_path = os.path.join(objectsDirectory, item)
                    dst_path = os.path.join(tmpSIPDir, "objects", item)
                    # If dst_path already exists and is a directory, shutil.move
                    # will move src_path into it rather than overwriting it;
                    # to avoid incorrectly-nested paths, move src_path's contents
                    # into it instead.
                    if os.path.exists(dst_path) and os.path.isdir(src_path):
                        for subitem in os.listdir(src_path):
                            shutil.move(os.path.join(src_path, subitem),
                                        dst_path)
                    else:
                        shutil.move(src_path, dst_path)

                # Get the ``Directory`` models representing the subdirectories in the
                # objects/ directory. For each subdirectory, confirm it's in the SIP
                # objects/ directory, and update the current location and owning SIP.
                for dir_mdl in dir_mdls:
                    currentPath = databaseFunctions.deUnicode(
                        dir_mdl.currentlocation)
                    currentSIPDirPath = currentPath.replace(
                        "%transferDirectory%", tmpSIPDir)
                    if os.path.isdir(currentSIPDirPath):
                        dir_mdl.currentlocation = currentPath.replace(
                            "%transferDirectory%", "%SIPDirectory%")
                        dir_mdl.sip = sip
                        dir_mdl.save()
                    else:
                        job.pyprint("directory not found: ",
                                    currentSIPDirPath,
                                    file=sys.stderr)

                # Get the database list of files in the objects directory.
                # For each file, confirm it's in the SIP objects directory, and update the
                # current location/ owning SIP'
                files = File.objects.filter(
                    transfer_id=transferUUID,
                    currentlocation__startswith="%transferDirectory%objects",
                    removedtime__isnull=True,
                )
                for f in files:
                    currentPath = databaseFunctions.deUnicode(
                        f.currentlocation)
                    currentSIPFilePath = currentPath.replace(
                        "%transferDirectory%", tmpSIPDir)
                    if os.path.isfile(currentSIPFilePath):
                        f.currentlocation = currentPath.replace(
                            "%transferDirectory%", "%SIPDirectory%")
                        f.sip = sip
                        f.save()
                    else:
                        job.pyprint("file not found: ",
                                    currentSIPFilePath,
                                    file=sys.stderr)

                archivematicaFunctions.create_directories(
                    archivematicaFunctions.MANUAL_NORMALIZATION_DIRECTORIES,
                    basepath=tmpSIPDir,
                )

                # Copy the JSON metadata file, if present; this contains a
                # serialized copy of DC metadata entered in the dashboard UI
                # during the transfer.
                src = os.path.normpath(
                    os.path.join(objectsDirectory, "..", "metadata",
                                 "dc.json"))
                dst = os.path.join(tmpSIPDir, "metadata", "dc.json")
                if os.path.exists(src):
                    shutil.copy(src, dst)

                # Copy processingMCP.xml file
                src = os.path.join(os.path.dirname(objectsDirectory[:-1]),
                                   "processingMCP.xml")
                dst = os.path.join(tmpSIPDir, "processingMCP.xml")
                shutil.copy(src, dst)

                # moveSIPTo autoProcessSIPDirectory
                shutil.move(tmpSIPDir, destSIPDir)
        dst_path = os.path.join(tmpSIPDir, "objects", item)
        # If dst_path already exists and is a directory, shutil.move
        # will move src_path into it rather than overwriting it;
        # to avoid incorrectly-nested paths, move src_path's contents
        # into it instead.
        if os.path.exists(dst_path) and os.path.isdir(src_path):
            for subitem in os.listdir(src_path):
                shutil.move(os.path.join(src_path, subitem), dst_path)
        else:
            shutil.move(src_path, dst_path)

    # Get the ``Directory`` models representing the subdirectories in the
    # objects/ directory. For each subdirectory, confirm it's in the SIP
    # objects/ directory, and update the current location and owning SIP.
    for dir_mdl in dir_mdls:
        currentPath = databaseFunctions.deUnicode(dir_mdl.currentlocation)
        currentSIPDirPath = currentPath.replace("%transferDirectory%",
                                                tmpSIPDir)
        if os.path.isdir(currentSIPDirPath):
            dir_mdl.currentlocation = currentPath.replace(
                "%transferDirectory%", "%SIPDirectory%")
            dir_mdl.sip = sip
            dir_mdl.save()
        else:
            print("directory not found: ", currentSIPDirPath, file=sys.stderr)

    # Get the database list of files in the objects directory.
    # For each file, confirm it's in the SIP objects directory, and update the
    # current location/ owning SIP'
    files = File.objects.filter(
        transfer_id=transferUUID,
Example #21
0
        row = rows[0]
        sipUUID = row[0]
    else:
        sipUUID = uuid.uuid4().__str__()
        databaseFunctions.createSIP(lookup_path, sipUUID)

    #move the objects to the SIPDir
    for item in os.listdir(objectsDirectory):
        shutil.move(os.path.join(objectsDirectory, item), os.path.join(tmpSIPDir, "objects", item))

    #get the database list of files in the objects directory
    #for each file, confirm it's in the SIP objects directory, and update the current location/ owning SIP'
    sql = """SELECT  fileUUID, currentLocation FROM Files WHERE removedTime = 0 AND currentLocation LIKE '\%transferDirectory\%objects%' AND transferUUID =  '""" + transferUUID + "'"
    for row in databaseInterface.queryAllSQL(sql):
        fileUUID = row[0]
        currentPath = databaseFunctions.deUnicode(row[1])
        currentSIPFilePath = currentPath.replace("%transferDirectory%", tmpSIPDir)
        if os.path.isfile(currentSIPFilePath):
            sql = """UPDATE Files SET currentLocation='%s', sipUUID='%s' WHERE fileUUID='%s'""" % (MySQLdb.escape_string(currentPath.replace("%transferDirectory%", "%SIPDirectory%")), sipUUID, fileUUID)
            databaseInterface.runSQL(sql)
        else:
            print >>sys.stderr, "file not found: ", currentSIPFilePath

    for directory in createManualNormalizedDirectoriesList:
        path = os.path.join(tmpSIPDir, directory)
        if not os.path.isdir(path):
            os.makedirs(path)
    
    #copy processingMCP.xml file
    src = os.path.join(os.path.dirname(objectsDirectory[:-1]), "processingMCP.xml") 
    dst = os.path.join(tmpSIPDir, "processingMCP.xml")
        databaseFunctions.createSIP(
            destSIPDir.replace(sharedPath, '%sharedPath%'), sipUUID)

        # move the objects to the SIPDir
        for item in os.listdir(containerPath):
            shutil.move(os.path.join(containerPath, item),
                        os.path.join(tmpSIPDir, "objects", item))

        # get the database list of files in the objects directory
        # for each file, confirm it's in the SIP objects directory, and update the current location/ owning SIP'
        directory = os.path.join(transfer_objects_directory, container)
        files = File.objects.filter(removedtime__isnull=True,
                                    currentlocation__startswith=directory,
                                    transfer_id=transferUUID)
        for f in files:
            currentPath = databaseFunctions.deUnicode(
                f.currentlocation).replace(directory,
                                           transfer_objects_directory)
            currentSIPFilePath = currentPath.replace("%transferDirectory%",
                                                     tmpSIPDir)
            if os.path.isfile(currentSIPFilePath):
                f.currentlocation = currentPath.replace(
                    "%transferDirectory%", "%SIPDirectory%")
                f.sip_id = sipUUID
                f.save()
            else:
                print >> sys.stderr, "file not found: ", currentSIPFilePath

        # moveSIPTo autoProcessSIPDirectory
        shutil.move(tmpSIPDir, destSIPDir)
Example #23
0
    def __init__(self, jobChainLink, pk, unit):
        self.tasks = {}
        self.tasksLock = threading.Lock()
        self.pk = pk
        self.jobChainLink = jobChainLink
        self.exitCode = 0
        self.clearToNextLink = False
        sql = """SELECT * FROM StandardTasksConfigs where pk = '%s'""" % (
            pk.__str__())
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        while row != None:
            filterFileEnd = deUnicode(row[1])
            filterFileStart = deUnicode(row[2])
            filterSubDir = deUnicode(row[3])
            requiresOutputLock = row[4]
            self.standardOutputFile = deUnicode(row[5])
            self.standardErrorFile = deUnicode(row[6])
            self.execute = deUnicode(row[7])
            self.arguments = deUnicode(row[8])
            row = c.fetchone()
        sqlLock.release()
        if requiresOutputLock:
            outputLock = threading.Lock()
        else:
            outputLock = None

        SIPReplacementDic = unit.getReplacementDic(unit.currentPath)

        SIPUUID = unit.owningUnit.UUID
        sql = """SELECT variableValue FROM UnitVariables WHERE unitType = 'SIP' AND variable = 'normalizationFileIdentificationToolIdentifierTypes' AND unitUUID = '%s';""" % (
            SIPUUID)
        rows = databaseInterface.queryAllSQL(sql)
        if len(rows):
            fileIdentificationRestriction = rows[0][0]
        else:
            fileIdentificationRestriction = None

        self.tasksLock.acquire()
        for file, fileUnit in unit.fileList.items():
            #print "file:", file, fileUnit
            if filterFileEnd:
                if not file.endswith(filterFileEnd):
                    continue
            if filterFileStart:
                if not os.path.basename(file).startswith(filterFileStart):
                    continue
            if filterSubDir:
                #print "file", file, type(file)
                #print unit.pathString, type(unit.pathString)
                #filterSubDir = filterSubDir.encode('utf-8')
                #print filterSubDir, type(filterSubDir)

                if not file.startswith(unit.pathString + filterSubDir):
                    print "skipping file", file, filterSubDir, " :   \t Doesn't start with: ", unit.pathString + filterSubDir
                    continue

            standardOutputFile = self.standardOutputFile
            standardErrorFile = self.standardErrorFile
            execute = self.execute
            arguments = self.arguments

            if self.jobChainLink.passVar != None:
                if isinstance(self.jobChainLink.passVar, replacementDic):
                    execute, arguments, standardOutputFile, standardErrorFile = self.jobChainLink.passVar.replace(
                        execute, arguments, standardOutputFile,
                        standardErrorFile)

            fileUUID = unit.UUID
            ComandClassification = self.execute
            #passVar=self.jobChainLink.passVar
            toPassVar = eval(arguments)
            toPassVar.update({
                "%standardErrorFile%": standardErrorFile,
                "%standardOutputFile%": standardOutputFile,
                '%commandClassifications%': ComandClassification
            })
            #print "debug", toPassVar, toPassVar['%normalizeFileGrpUse%'], unit.fileGrpUse
            passVar = replacementDic(toPassVar)
            if toPassVar[
                    '%normalizeFileGrpUse%'] != unit.fileGrpUse or self.alreadyNormalizedManually(
                        unit, ComandClassification):
                #print "debug: ", unit.currentPath, unit.fileGrpUse
                self.jobChainLink.linkProcessingComplete(
                    self.exitCode, passVar=self.jobChainLink.passVar)
            else:
                taskType = databaseInterface.queryAllSQL(
                    "SELECT pk FROM TaskTypes WHERE description = '%s';" %
                    ("Transcoder task type"))[0][0]
                if fileIdentificationRestriction:
                    sql = """SELECT MicroServiceChainLinks.pk, CommandRelationships.pk, CommandRelationships.command FROM FilesIdentifiedIDs JOIN FileIDs ON FilesIdentifiedIDs.fileID = FileIDs.pk JOIN FileIDTypes ON FileIDs.fileIDType = FileIDTypes.pk JOIN CommandRelationships ON FilesIdentifiedIDs.fileID = CommandRelationships.fileID JOIN CommandClassifications ON CommandClassifications.pk = CommandRelationships.commandClassification JOIN TasksConfigs ON TasksConfigs.taskTypePKReference = CommandRelationships.pk JOIN MicroServiceChainLinks ON MicroServiceChainLinks.currentTask = TasksConfigs.pk WHERE TasksConfigs.taskType = '%s' AND FilesIdentifiedIDs.fileUUID = '%s' AND CommandClassifications.classification = '%s' AND (%s) AND CommandRelationships.enabled = TRUE AND CommandClassifications.enabled = TRUE AND FileIDTypes.enabled = TRUE GROUP BY MicroServiceChainLinks.pk;""" % (
                        taskType, fileUUID, ComandClassification,
                        fileIdentificationRestriction)
                else:
                    sql = """SELECT MicroServiceChainLinks.pk, CommandRelationships.pk, CommandRelationships.command FROM FilesIdentifiedIDs JOIN CommandRelationships ON FilesIdentifiedIDs.fileID = CommandRelationships.fileID JOIN CommandClassifications ON CommandClassifications.pk = CommandRelationships.commandClassification JOIN TasksConfigs ON TasksConfigs.taskTypePKReference = CommandRelationships.pk JOIN MicroServiceChainLinks ON MicroServiceChainLinks.currentTask = TasksConfigs.pk WHERE TasksConfigs.taskType = '%s' AND FilesIdentifiedIDs.fileUUID = '%s' AND CommandClassifications.classification = '%s' AND CommandRelationships.enabled = TRUE AND CommandClassifications.enabled = TRUE GROUP BY MicroServiceChainLinks.pk;""" % (
                        taskType, fileUUID, ComandClassification)
                rows = databaseInterface.queryAllSQL(sql)

                commandsRun = {}
                if rows and len(rows):
                    for row in rows:
                        microServiceChainLink, commandRelationship, command = row
                        if command in commandsRun:
                            link = commandsRun[command]
                            sql = """SELECT exitCode FROM Tasks JOIN Jobs ON Jobs.jobUUID = Tasks.jobUUID WHERE Tasks.jobUUID IN (SELECT jobUUID FROM Jobs WHERE subJobOf = '%s') AND Jobs.MicroServiceChainLinksPK = '%s';""" % (
                                self.jobChainLink.UUID, link)
                            rows = databaseInterface.queryAllSQL(sql)
                            if len(rows) != 1:
                                print sys.stderr, "Bad query:", sql
                            for row in rows:
                                ret = row[0]
                                sql = "UPDATE CommandRelationships SET countAttempts=countAttempts+1 WHERE pk='" + commandRelationship + "';"
                                databaseInterface.runSQL(sql)
                                if ret:
                                    column = "countNotOK"
                                else:
                                    column = "countOK"
                                sql = "UPDATE CommandRelationships SET " + column + "=" + column + "+1 WHERE pk='" + commandRelationship + "';"
                                databaseInterface.runSQL(sql)
                        else:
                            commandsRun[command] = microServiceChainLink
                            jobChainLink.jobChain.nextChainLink(
                                row[0],
                                passVar=passVar,
                                incrementLinkSplit=True,
                                subJobOf=self.jobChainLink.UUID)
                else:
                    sql = """SELECT MicroserviceChainLink, CommandRelationships.pk, CommandRelationships.command FROM DefaultCommandsForClassifications JOIN MicroServiceChainLinks ON MicroServiceChainLinks.pk = DefaultCommandsForClassifications.MicroserviceChainLink  JOIN TasksConfigs ON TasksConfigs.pk = MicroServiceChainLinks.currentTask  JOIN CommandRelationships ON CommandRelationships.pk = TasksConfigs.taskTypePKReference JOIN CommandClassifications ON CommandClassifications.pk = DefaultCommandsForClassifications.forClassification WHERE TasksConfigs.taskType = '5e70152a-9c5b-4c17-b823-c9298c546eeb' AND CommandClassifications.classification = '%s' AND DefaultCommandsForClassifications.enabled = TRUE;""" % (
                        ComandClassification)
                    rows = databaseInterface.queryAllSQL(sql)

                    for row in rows:
                        microServiceChainLink, commandRelationship, command = row

                        if command in commandsRun:
                            link = commandsRun[command]
                            sql = """SELECT exitCode FROM Tasks JOIN Jobs ON Jobs.jobUUID = Tasks.jobUUID WHERE Tasks.jobUUID IN (SELECT jobUUID FROM Jobs WHERE subJobOf = '%s') AND Jobs.MicroServiceChainLinksPK = '%s';""" % (
                                self.jobChainLink.UUID, link)
                            rows = databaseInterface.queryAllSQL(sql)
                            if len(rows) != 1:
                                print sys.stderr, "Bad query:", sql
                            for row in rows:
                                ret = row[0]
                                sql = "UPDATE CommandRelationships SET countAttempts=countAttempts+1 WHERE pk='" + commandRelationship + "';"
                                databaseInterface.runSQL(sql)
                                if ret:
                                    column = "countNotOK"
                                else:
                                    column = "countOK"
                                sql = "UPDATE CommandRelationships SET " + column + "=" + column + "+1 WHERE pk='" + commandRelationship + "';"
                                databaseInterface.runSQL(sql)
                        else:
                            jobChainLink.jobChain.nextChainLink(
                                microServiceChainLink,
                                passVar=passVar,
                                incrementLinkSplit=True,
                                subJobOf=self.jobChainLink.UUID)
                            commandsRun[command] = microServiceChainLink
                self.jobChainLink.linkProcessingComplete(
                    self.exitCode, passVar=self.jobChainLink.passVar)
    def __init__(self, jobChainLink, pk, unit):
        global outputLock
        self.tasks = {}
        self.tasksLock = threading.Lock()
        self.pk = pk
        self.jobChainLink = jobChainLink
        self.exitCode = 0
        self.clearToNextLink = False

        opts = {
            "inputFile": "%relativeLocation%",
            "fileUUID": "%fileUUID%",
            'commandClassifications': '%commandClassifications%',
            "taskUUID": "%taskUUID%",
            "objectsDirectory": "%SIPObjectsDirectory%",
            "logsDirectory": "%SIPLogsDirectory%",
            "sipUUID": "%SIPUUID%",
            "sipPath": "%SIPDirectory%",
            "fileGrpUse": "%fileGrpUse%",
            "normalizeFileGrpUse": "%normalizeFileGrpUse%",
            "excludeDirectory": "%excludeDirectory%",
            "standardErrorFile": "%standardErrorFile%",
            "standardOutputFile": "%standardOutputFile%"
        }

        SIPReplacementDic = unit.getReplacementDic(unit.currentPath)
        for optsKey, optsValue in opts.iteritems():
            if self.jobChainLink.passVar != None:
                if isinstance(self.jobChainLink.passVar, replacementDic):
                    opts[optsKey] = self.jobChainLink.passVar.replace(
                        opts[optsKey])[0]

            commandReplacementDic = unit.getReplacementDic()
            for key, value in commandReplacementDic.iteritems():
                opts[optsKey] = opts[optsKey].replace(key, value)

            for key, value in SIPReplacementDic.iteritems():
                opts[optsKey] = opts[optsKey].replace(key, value)

        commandReplacementDic = unit.getReplacementDic()
        sql = """SELECT CommandRelationships.pk FROM CommandRelationships JOIN Commands ON CommandRelationships.command = Commands.pk WHERE CommandRelationships.pk = '%s';""" % (
            pk.__str__())
        rows = databaseInterface.queryAllSQL(sql)
        taskCount = 0
        tasksList = []
        if rows:
            self.tasksLock.acquire()
            for row in rows:
                UUID = uuid.uuid4().__str__()
                opts["taskUUID"] = UUID
                opts["CommandRelationship"] = pk.__str__()
                execute = "transcoder_cr%s" % (pk)
                deUnicode(execute)
                arguments = row.__str__()
                standardOutputFile = opts["standardOutputFile"]
                standardErrorFile = opts["standardErrorFile"]
                self.standardOutputFile = standardOutputFile
                self.standardErrorFile = standardErrorFile
                self.execute = execute
                self.arguments = arguments
                task = taskStandard(self,
                                    execute,
                                    opts,
                                    standardOutputFile,
                                    standardErrorFile,
                                    outputLock=outputLock,
                                    UUID=UUID)
                self.tasks[UUID] = task
                databaseFunctions.logTaskCreatedSQL(self,
                                                    commandReplacementDic,
                                                    UUID, arguments)
                taskCount += 1
                tasksList.append(task)
            self.tasksLock.release()

            for task in tasksList:
                task.performTask()

        else:
            self.jobChainLink.linkProcessingComplete(self.exitCode)
    def __init__(self, jobChainLink, pk, unit):
        self.tasks = []
        self.pk = pk
        self.jobChainLink = jobChainLink
        sql = """SELECT * FROM StandardTasksConfigs where pk = '%s'""" % (
            pk.__str__())
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        if row == None:
            print >> sys.stderr, "\nfind me\n"
            traceback.print_exc(file=sys.stderr)
            return None
        while row != None:
            print row
            #pk = row[0]
            filterFileEnd = deUnicode(row[1])
            filterFileStart = deUnicode(row[2])
            filterSubDir = deUnicode(row[3])
            self.requiresOutputLock = deUnicode(row[4])
            standardOutputFile = deUnicode(row[5])
            standardErrorFile = deUnicode(row[6])
            execute = deUnicode(row[7])
            self.execute = execute
            arguments = deUnicode(row[8])
            row = c.fetchone()
        sqlLock.release()
        #if reloadFileList:
        #    unit.reloadFileList()

        #        "%taskUUID%": task.UUID.__str__(), \

        if filterSubDir:
            directory = os.path.join(unit.currentPath, filterSubDir)
        else:
            directory = unit.currentPath

        if self.jobChainLink.passVar != None:
            if isinstance(self.jobChainLink.passVar, list):
                for passVar in self.jobChainLink.passVar:
                    if isinstance(passVar, replacementDic):
                        execute, arguments, standardOutputFile, standardErrorFile = passVar.replace(
                            execute, arguments, standardOutputFile,
                            standardErrorFile)
            elif isinstance(self.jobChainLink.passVar, replacementDic):
                execute, arguments, standardOutputFile, standardErrorFile = self.jobChainLink.passVar.replace(
                    execute, arguments, standardOutputFile, standardErrorFile)

        commandReplacementDic = unit.getReplacementDic(directory)
        #for each key replace all instances of the key in the command string
        for key in commandReplacementDic.iterkeys():
            value = commandReplacementDic[key].replace("\"", ("\\\""))
            if execute:
                execute = execute.replace(key, value)
            if arguments:
                arguments = arguments.replace(key, value)
            if standardOutputFile:
                standardOutputFile = standardOutputFile.replace(key, value)
            if standardErrorFile:
                standardErrorFile = standardErrorFile.replace(key, value)

        UUID = uuid.uuid4().__str__()
        self.task = taskStandard(self,
                                 execute,
                                 arguments,
                                 standardOutputFile,
                                 standardErrorFile,
                                 UUID=UUID)
        databaseFunctions.logTaskCreatedSQL(self, commandReplacementDic, UUID,
                                            arguments)
        t = threading.Thread(target=self.task.performTask)
        t.daemon = True
        t.start()