Ejemplo n.º 1
0
    def __init__(self, jobChainLink, unit):
        super(linkTaskManagerGetMicroserviceGeneratedListInStdOut,
              self).__init__(jobChainLink, unit)
        config = self.jobChainLink.link.config
        filterSubDir = config["filter_subdir"]
        standardOutputFile = config["stdout_file"]
        standardErrorFile = config["stderr_file"]
        execute = config["execute"]
        arguments = config["arguments"]

        # Used by ``TaskGroup._log_task``.
        self.execute = config["execute"]

        if filterSubDir:
            directory = os.path.join(unit.currentPath, filterSubDir)
        else:
            directory = unit.currentPath

        # Apply passvar replacement values
        if self.jobChainLink.passVar is not None:
            if isinstance(self.jobChainLink.passVar, list):
                for passVar in self.jobChainLink.passVar:
                    if isinstance(passVar, ReplacementDict):
                        arguments, standardOutputFile, standardErrorFile = passVar.replace(
                            arguments, standardOutputFile, standardErrorFile)
            elif isinstance(self.jobChainLink.passVar, ReplacementDict):
                arguments, standardOutputFile, standardErrorFile = self.jobChainLink.passVar.replace(
                    arguments, standardOutputFile, standardErrorFile)

        # Apply unit (SIP/Transfer) replacement values
        commandReplacementDic = unit.getReplacementDic(directory)
        # Escape all values for shell
        for key, value in commandReplacementDic.items():
            commandReplacementDic[
                key] = archivematicaFunctions.escapeForCommand(value)
        arguments, standardOutputFile, standardErrorFile = commandReplacementDic.replace(
            arguments, standardOutputFile, standardErrorFile)

        group = TaskGroup(self, execute)
        group.addTask(
            arguments,
            standardOutputFile,
            standardErrorFile,
            commandReplacementDic=commandReplacementDic,
            wants_output=True,
        )
        group.logTaskCreatedSQL()
        TaskGroupRunner.runTaskGroup(group, self.taskGroupFinished)
Ejemplo n.º 2
0
    def __init__(self, jobChainLink, pk, unit):
        super(linkTaskManagerDirectories, self).__init__(jobChainLink, pk, unit)
        stc = StandardTaskConfig.objects.get(id=str(pk))
        filterSubDir = stc.filter_subdir
        standardOutputFile = stc.stdout_file
        standardErrorFile = stc.stderr_file
        execute = stc.execute
        self.execute = execute
        arguments = stc.arguments

        if filterSubDir:
            directory = os.path.join(unit.currentPath, filterSubDir)
        else:
            directory = unit.currentPath

        # Apply passvar replacement values
        if self.jobChainLink.passVar is not None:
            if isinstance(self.jobChainLink.passVar, list):
                for passVar in self.jobChainLink.passVar:
                    if isinstance(passVar, ReplacementDict):
                        arguments, standardOutputFile, standardErrorFile = passVar.replace(arguments, standardOutputFile, standardErrorFile)
            elif isinstance(self.jobChainLink.passVar, ReplacementDict):
                arguments, standardOutputFile, standardErrorFile = self.jobChainLink.passVar.replace(arguments, standardOutputFile, standardErrorFile)

        # Apply unit (SIP/Transfer) replacement values
        commandReplacementDic = unit.getReplacementDic(directory)
        # Escape all values for shell
        for key, value in commandReplacementDic.items():
            commandReplacementDic[key] = archivematicaFunctions.escapeForCommand(value)
        arguments, standardOutputFile, standardErrorFile = commandReplacementDic.replace(arguments, standardOutputFile, standardErrorFile)

        group = TaskGroup(self, execute)
        group.addTask(arguments, standardOutputFile, standardErrorFile,
                      commandReplacementDic=commandReplacementDic)
        group.logTaskCreatedSQL()
        TaskGroupRunner.runTaskGroup(group, self.taskGroupFinished)
Ejemplo n.º 3
0
    def __init__(self, jobChainLink, pk, unit):
        super(linkTaskManagerFiles, self).__init__(jobChainLink, pk, unit)

        if jobChainLink.reloadFileList:
            unit.reloadFileList()

        # The list of task groups we'll be executing for this batch of files
        self.taskGroupsLock = threading.Lock()
        self.taskGroups = {}

        # Zero if every taskGroup executed so far has succeeded.  Otherwise,
        # something greater than zero.
        self.exitCode = 0

        self.clearToNextLink = False

        stc = StandardTaskConfig.objects.get(id=str(pk))
        # These three may be concatenated/compared with other strings,
        # so they need to be bytestrings here
        filterFileEnd = str(stc.filter_file_end) if stc.filter_file_end else ''
        filterFileStart = str(stc.filter_file_start) if stc.filter_file_start else ''
        filterSubDir = str(stc.filter_subdir) if stc.filter_subdir else ''
        self.standardOutputFile = stc.stdout_file
        self.standardErrorFile = stc.stderr_file
        self.execute = stc.execute
        self.arguments = stc.arguments

        outputLock = threading.Lock()

        # Check if filterSubDir has been overridden for this Transfer/SIP
        try:
            var = UnitVariable.objects.get(unittype=self.unit.unitType,
                                           unituuid=self.unit.UUID,
                                           variable=self.execute)
        except (UnitVariable.DoesNotExist, UnitVariable.MultipleObjectsReturned):
            var = None

        if var:
            try:
                variableValue = ast.literal_eval(var.variablevalue)
            except SyntaxError:
                # SyntaxError = contents of variableValue weren't the expected dict
                pass
            else:
                filterSubDir = variableValue['filterSubDir']

        SIPReplacementDic = unit.getReplacementDic(unit.currentPath)
        # Escape all values for shell
        for key, value in SIPReplacementDic.items():
            SIPReplacementDic[key] = archivematicaFunctions.escapeForCommand(value)
        self.taskGroupsLock.acquire()

        currentTaskGroup = None

        for file, fileUnit in unit.fileList.items():
            if filterFileEnd:
                if not file.endswith(filterFileEnd):
                    continue
            if filterFileStart:
                if not os.path.basename(file).startswith(filterFileStart):
                    continue
            if filterSubDir:
                if not file.startswith(unit.pathString + filterSubDir):
                    continue

            standardOutputFile = self.standardOutputFile
            standardErrorFile = self.standardErrorFile
            arguments = self.arguments

            # Apply passvar replacement values
            if self.jobChainLink.passVar is not None:
                if isinstance(self.jobChainLink.passVar, list):
                    for passVar in self.jobChainLink.passVar:
                        if isinstance(passVar, ReplacementDict):
                            arguments, standardOutputFile, standardErrorFile = passVar.replace(arguments, standardOutputFile, standardErrorFile)
                elif isinstance(self.jobChainLink.passVar, ReplacementDict):
                    arguments, standardOutputFile, standardErrorFile = self.jobChainLink.passVar.replace(arguments, standardOutputFile, standardErrorFile)

            # Apply file replacement values
            commandReplacementDic = fileUnit.getReplacementDic()
            for key, value in commandReplacementDic.items():
                # Escape values for shell
                commandReplacementDic[key] = archivematicaFunctions.escapeForCommand(value)
            arguments, standardOutputFile, standardErrorFile = commandReplacementDic.replace(arguments, standardOutputFile, standardErrorFile)

            # Apply unit (SIP/Transfer) replacement values
            arguments, standardOutputFile, standardErrorFile = SIPReplacementDic.replace(arguments, standardOutputFile, standardErrorFile)

            if currentTaskGroup is None or currentTaskGroup.count() > BATCH_SIZE:
                currentTaskGroup = TaskGroup(self, self.execute)
                self.taskGroups[currentTaskGroup.UUID] = currentTaskGroup

            currentTaskGroup.addTask(
                arguments, standardOutputFile, standardErrorFile,
                outputLock, commandReplacementDic)

        for taskGroup in self.taskGroups.values():
            taskGroup.logTaskCreatedSQL()
            TaskGroupRunner.runTaskGroup(taskGroup, self.taskGroupFinished)

        self.clearToNextLink = True
        self.taskGroupsLock.release()

        # If the batch of files was empty, we can immediately proceed to the
        # next job in the chain.  Assume a successful status code.
        if self.taskGroups == {}:
            self.jobChainLink.linkProcessingComplete(0)
Ejemplo n.º 4
0
            workflow = load_workflow(workflow_file)
        except SchemaValidationError as err:
            logger.error("Workflow validation error: %s", err)
            sys.exit(1)

    dicts.setup(
        shared_directory=django_settings.SHARED_DIRECTORY,
        processing_directory=django_settings.PROCESSING_DIRECTORY,
        watch_directory=django_settings.WATCH_DIRECTORY,
        rejected_directory=django_settings.REJECTED_DIRECTORY,
    )

    created_shared_directory_structure()

    t = threading.Thread(target=debugMonitor)
    t.daemon = True
    t.start()

    t = threading.Thread(target=flushOutputs)
    t.daemon = True
    t.start()

    Executor.init()
    TaskGroupRunner.init()

    cleanupOldDbEntriesOnNewRun()
    watchDirectories(workflow)

    # This is blocking the main thread with the worker loop
    RPCServer.start(workflow)