Beispiel #1
0
    def proceedWithChoice(self, index, user_id):
        if user_id:
            agent_id = UserProfile.objects.get(user_id=int(user_id)).agent_id
            agent_id = str(agent_id)
            self.unit.setVariable("activeAgent", agent_id, None)

        choicesAvailableForUnitsLock.acquire()
        try:
            del choicesAvailableForUnits[self.jobChainLink.UUID]
        except KeyError:
            pass
        choicesAvailableForUnitsLock.release()

        # get the one at index, and go with it.
        _, _, replace_dict = self.choices[int(index)]
        rd = ReplacementDict.fromstring(replace_dict)
        self.update_passvar_replacement_dict(rd)
        self.jobChainLink.linkProcessingComplete(
            0, passVar=self.jobChainLink.passVar)
Beispiel #2
0
def get_replacement_dict(opts):
    """ Generates values for all knows %var% replacement variables. """
    prefix = ""
    postfix = ""
    output_dir = ""
    #get file name and extension
    (directory, basename) = os.path.split(opts.file_path)
    directory += os.path.sep  # All paths should have trailing /
    (filename, extension_dot) = os.path.splitext(basename)

    if "preservation" in opts.purpose:
        postfix = "-" + opts.task_uuid
        output_dir = directory
    elif "access" in opts.purpose:
        prefix = opts.file_uuid + "-"
        output_dir = os.path.join(opts.sip_path, "DIP",
                                  "objects") + os.path.sep
    elif "thumbnail" in opts.purpose:
        output_dir = os.path.join(opts.sip_path, "thumbnails") + os.path.sep
        postfix = opts.file_uuid
    else:
        print("Unsupported command purpose", opts.purpose, file=sys.stderr)
        return None

    # Populates the standard set of unit variables, so,
    # e.g., %fileUUID% is available
    replacement_dict = ReplacementDict.frommodel(type_='file',
                                                 file_=opts.file_uuid)

    output_filename = ''.join([prefix, filename, postfix])
    replacement_dict.update({
        "%outputDirectory%":
        output_dir,
        "%prefix%":
        prefix,
        "%postfix%":
        postfix,
        "%outputFileName%":
        output_filename,  # does not include extension
        "%outputFilePath%":
        os.path.join(output_dir, output_filename)  # does not include extension
    })
    return replacement_dict
Beispiel #3
0
def test_replacementdict_model_constructor_transfer():
    rd = ReplacementDict.frommodel(sip=TRANSFER, file_=FILE, type_='transfer')

    # Transfer-specific variables
    assert rd['%SIPUUID%'] == TRANSFER.uuid
    assert rd['%relativeLocation%'] == TRANSFER.currentlocation
    assert rd['%currentPath%'] == TRANSFER.currentlocation
    assert rd['%SIPDirectory%'] == TRANSFER.currentlocation
    assert rd['%transferDirectory%'] == TRANSFER.currentlocation
    assert rd['%SIPDirectoryBasename%'] == os.path.basename(TRANSFER.currentlocation)
    assert rd['%SIPLogsDirectory%'] == os.path.join(TRANSFER.currentlocation, 'logs/')
    assert rd['%SIPObjectsDirectory%'] == os.path.join(TRANSFER.currentlocation, 'objects/')
    # no, not actually relative
    assert rd['%relativeLocation%'] == TRANSFER.currentlocation

    # File-specific variables
    assert rd['%fileUUID%'] == FILE.uuid
    assert rd['%originalLocation%'] == FILE.originallocation
    assert rd['%currentLocation%'] == FILE.currentlocation
    assert rd['%fileGrpUse%'] == FILE.filegrpuse
def test_replacementdict_model_constructor_sip():
    rd = ReplacementDict.frommodel(sip=SIP, file_=FILE, type_="sip")

    # SIP-specific variables
    assert rd["%SIPUUID%"] == SIP.uuid
    assert rd["%relativeLocation%"] == SIP.currentpath
    assert rd["%currentPath%"] == SIP.currentpath
    assert rd["%SIPDirectory%"] == SIP.currentpath
    assert "%transferDirectory%" not in rd
    assert rd["%SIPDirectoryBasename%"] == os.path.basename(SIP.currentpath)
    assert rd["%SIPLogsDirectory%"] == os.path.join(SIP.currentpath, "logs/")
    assert rd["%SIPObjectsDirectory%"] == os.path.join(SIP.currentpath,
                                                       "objects/")
    assert rd["%relativeLocation%"] == SIP.currentpath

    # File-specific variables
    assert rd["%fileUUID%"] == FILE.uuid
    assert rd["%originalLocation%"] == FILE.originallocation
    assert rd["%currentLocation%"] == FILE.currentlocation
    assert rd["%fileGrpUse%"] == FILE.filegrpuse
Beispiel #5
0
    def getReplacementDic(self, target):
        ret = ReplacementDict.frommodel(type_="sip", sip=self.UUID)

        # augment the dict here, because DIP is a special case whose paths are
        # not entirely based on data from the database - the locations need to
        # be overridden.
        sip_directory = self.currentPath.replace(
            django_settings.SHARED_DIRECTORY, "%sharedPath%")
        relative_directory_location = target.replace(
            django_settings.SHARED_DIRECTORY, "%sharedPath%")

        ret["%SIPLogsDirectory%"] = os.path.join(sip_directory, "logs", "")
        ret["%SIPObjectsDirectory%"] = os.path.join(sip_directory, "objects",
                                                    "")
        ret["%SIPDirectory%"] = sip_directory
        ret["%SIPDirectoryBasename"] = os.path.basename(
            os.path.abspath(sip_directory))
        ret["%relativeLocation%"] = target.replace(
            self.currentPath, relative_directory_location, 1)
        ret["%unitType%"] = "DIP"
        return ret
Beispiel #6
0
    def getReplacementDic(self, target):
        ret = ReplacementDict.frommodel(type_='sip', sip=self.UUID)

        # augment the dict here, because DIP is a special case whose paths are
        # not entirely based on data from the database - the locations need to
        # be overridden.
        sip_directory = self.currentPath.replace(
            archivematicaMCP.config.get('MCPServer', "sharedDirectory"),
            "%sharedPath%")
        relative_directory_location = target.replace(
            archivematicaMCP.config.get('MCPServer', "sharedDirectory"),
            "%sharedPath%")

        ret["%SIPLogsDirectory%"] = os.path.join(sip_directory, "logs", "")
        ret["%SIPObjectsDirectory%"] = os.path.join(sip_directory, "objects",
                                                    "")
        ret["%SIPDirectory%"] = sip_directory
        ret["%SIPDirectoryBasename"] = os.path.basename(
            os.path.abspath(sip_directory))
        ret["%relativeLocation%"] = target.replace(
            self.currentPath, relative_directory_location, 1)
        ret["%unitType%"] = "DIP"
        return ret
Beispiel #7
0
def main(job, file_path, file_uuid, sip_uuid):
    setup_dicts(mcpclient_settings)

    failed = False

    # Check to see whether the file has already been characterized; don't try
    # to characterize it a second time if so.
    if FPCommandOutput.objects.filter(file_id=file_uuid).count() > 0:
        return 0

    try:
        format = FormatVersion.active.get(
            fileformatversion__file_uuid=file_uuid)
    except FormatVersion.DoesNotExist:
        rules = format = None

    if format:
        rules = FPRule.active.filter(format=format.uuid,
                                     purpose="characterization")

    # Characterization always occurs - if nothing is specified, get one or more
    # defaults specified in the FPR.
    if not rules:
        rules = FPRule.active.filter(purpose="default_characterization")

    for rule in rules:
        if (rule.command.script_type == "bashScript"
                or rule.command.script_type == "command"):
            args = []
            command_to_execute = replace_string_values(rule.command.command,
                                                       file_=file_uuid,
                                                       sip=sip_uuid,
                                                       type_="file")
        else:
            rd = ReplacementDict.frommodel(file_=file_uuid,
                                           sip=sip_uuid,
                                           type_="file")
            args = rd.to_gnu_options()
            command_to_execute = rule.command.command

        exitstatus, stdout, stderr = executeOrRun(
            rule.command.script_type,
            command_to_execute,
            arguments=args,
            capture_output=True,
        )

        job.write_output(stdout)
        job.write_error(stderr)

        if exitstatus != 0:
            job.write_error(
                "Command {} failed with exit status {}; stderr:".format(
                    rule.command.description, exitstatus))
            failed = True
            continue
        # fmt/101 is XML - we want to collect and package any XML output, while
        # allowing other commands to execute without actually collecting their
        # output in the event that they are writing their output to disk.
        # FPCommandOutput can have multiple rows for a given file,
        # distinguished by the rule that produced it.
        if (rule.command.output_format
                and rule.command.output_format.pronom_id == "fmt/101"):
            try:
                etree.fromstring(stdout)
                insertIntoFPCommandOutput(file_uuid, stdout, rule.uuid)
                job.write_output(
                    'Saved XML output for command "{}" ({})'.format(
                        rule.command.description, rule.command.uuid))
            except etree.XMLSyntaxError:
                failed = True
                job.write_error(
                    'XML output for command "{}" ({}) was not valid XML; not saving to database'
                    .format(rule.command.description, rule.command.uuid))
        else:
            job.write_error(
                'Tool output for command "{}" ({}) is not XML; not saving to database'
                .format(rule.command.description, rule.command.uuid))

    if failed:
        return 255
    else:
        return 0
Beispiel #8
0
def main(job, task_uuid, file_uuid):
    setup_dicts(mcpclient_settings)

    succeeded = True

    file_ = File.objects.get(uuid=file_uuid)

    # Normally we don't transcribe derivatives (access copies, preservation copies);
    # however, some useful transcription tools can't handle some formats that
    # are common as the primary copies. For example, tesseract can't handle JPEG2000.
    # If there are no rules for the primary format passed in, try to look at each
    # derivative until a transcribable derivative is found.
    #
    # Skip derivatives to avoid double-scanning them; only look at them as a fallback.
    if file_.filegrpuse != "original":
        job.print_error(
            "{} is not an original; not transcribing".format(file_uuid))
        return 0

    rules = fetch_rules_for(file_)
    if not rules:
        file_, rules = fetch_rules_for_derivatives(file_)

    if not rules:
        job.print_error(
            "No rules found for file {} and its derivatives; not transcribing".
            format(file_uuid))
        return 0
    else:
        if file_.filegrpuse == "original":
            noun = "original"
        else:
            noun = file_.filegrpuse + " derivative"
        job.print_error("Transcribing {} {}".format(noun, file_.uuid))

    rd = ReplacementDict.frommodel(file_=file_, type_="file")

    for rule in rules:
        script = rule.command.command
        if rule.command.script_type in ("bashScript", "command"):
            script, = rd.replace(script)
            args = []
        else:
            args = rd.to_gnu_options

        exitstatus, stdout, stderr = executeOrRun(rule.command.script_type,
                                                  script,
                                                  arguments=args,
                                                  capture_output=True)
        job.write_output(stdout)
        job.write_error(stderr)
        if exitstatus != 0:
            succeeded = False

        output_path = rd.replace(rule.command.output_location)[0]
        relative_path = output_path.replace(rd["%SIPDirectory%"],
                                            "%SIPDirectory%")
        event = insert_transcription_event(exitstatus, file_uuid, rule,
                                           relative_path)

        if os.path.isfile(output_path):
            insert_file_into_database(
                task_uuid,
                file_uuid,
                rd["%SIPUUID%"],
                event,
                rule,
                output_path,
                relative_path,
            )

    return 0 if succeeded else 1
Beispiel #9
0
    def checkForPreconfiguredXML(self):
        ret = None
        xmlFilePath = os.path.join(
            self.unit.currentPath.replace(
                "%sharedPath%", django_settings.SHARED_DIRECTORY, 1) + "/",
            django_settings.PROCESSING_XML_FILE)

        if os.path.isfile(xmlFilePath):
            # For a list of items with pks:
            # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc;
            try:
                this_choice_point = choice_unifier.get(self.jobChainLink.pk,
                                                       self.jobChainLink.pk)
                tree = etree.parse(xmlFilePath)
                root = tree.getroot()
                for preconfiguredChoice in root.findall(
                        ".//preconfiguredChoice"):
                    if preconfiguredChoice.find(
                            "appliesTo").text == this_choice_point:
                        desiredChoice = preconfiguredChoice.find(
                            "goToChain").text
                        desiredChoice = choice_unifier.get(
                            desiredChoice, desiredChoice)
                        dic = MicroServiceChoiceReplacementDic.objects.get(
                            id=desiredChoice,
                            choiceavailableatlink=this_choice_point)
                        ret = dic.replacementdic
                        try:
                            # <delay unitAtime="yes">30</delay>
                            delayXML = preconfiguredChoice.find("delay")
                            unitAtimeXML = None
                            if delayXML:
                                unitAtimeXML = delayXML.get("unitCtime")
                            if unitAtimeXML is not None and unitAtimeXML.lower(
                            ) != "no":
                                delaySeconds = int(delayXML.text)
                                unitTime = os.path.getmtime(
                                    self.unit.currentPath.replace(
                                        "%sharedPath%",
                                        django_settings.SHARED_DIRECTORY, 1))
                                nowTime = time.time()
                                timeDifference = nowTime - unitTime
                                timeToGo = delaySeconds - timeDifference
                                LOGGER.info('Time to go: %s', timeToGo)
                                self.jobChainLink.setExitMessage(
                                    "Waiting till: " +
                                    datetime.datetime.fromtimestamp(
                                        (nowTime + timeToGo)).ctime())
                                rd = ReplacementDict.fromstring(ret)
                                if self.jobChainLink.passVar is not None:
                                    if isinstance(self.jobChainLink.passVar,
                                                  ReplacementDict):
                                        new = {}
                                        new.update(
                                            self.jobChainLink.passVar.dic)
                                        new.update(rd.dic)
                                        rd.dic = new
                                t = threading.Timer(
                                    timeToGo,
                                    self.jobChainLink.linkProcessingComplete,
                                    args=[0, rd],
                                    kwargs={})
                                t.daemon = True
                                t.start()

                                t2 = threading.Timer(
                                    timeToGo,
                                    self.jobChainLink.setExitMessage,
                                    args=[Job.STATUS_COMPLETED_SUCCESSFULLY],
                                    kwargs={})
                                t2.start()
                                return waitingOnTimer

                        except Exception:
                            LOGGER.info('Error parsing XML', exc_info=True)

            except Exception:
                LOGGER.warning(
                    'Error parsing xml at %s for pre-configured choice',
                    xmlFilePath,
                    exc_info=True)
        return ret
Beispiel #10
0
 def getReplacementDic(self, target):
     ret = ReplacementDict.frommodel(type_="transfer", sip=self.UUID)
     ret["%unitType%"] = self.unitType
     return ret
Beispiel #11
0
def test_replacementdict_replace():
    d = ReplacementDict({"%PREFIX%": "/usr/local"})
    assert d.replace("%PREFIX%/bin/") == ["/usr/local/bin/"]
Beispiel #12
0
def test_replacementdict_options():
    d = ReplacementDict({'%relativeLocation%': 'bar'})
    assert d.to_gnu_options() == ['--relative-location=bar']
def test_replacementdict_options():
    d = ReplacementDict({"%relativeLocation%": "bar"})
    assert d.to_gnu_options() == ["--relative-location=bar"]
Beispiel #14
0
    def checkForPreconfiguredXML(self):
        ret = None
        xmlFilePath = os.path.join(
            self.unit.currentPath.replace(
                "%sharedPath%", django_settings.SHARED_DIRECTORY, 1
            )
            + "/",
            django_settings.PROCESSING_XML_FILE,
        )

        if os.path.isfile(xmlFilePath):
            # For a list of items with pks:
            # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc;
            try:
                this_choice_point = choice_unifier.get(
                    self.jobChainLink.pk, self.jobChainLink.pk
                )
                tree = etree.parse(xmlFilePath)
                root = tree.getroot()
                for preconfiguredChoice in root.findall(".//preconfiguredChoice"):
                    if preconfiguredChoice.find("appliesTo").text == this_choice_point:
                        desiredChoice = preconfiguredChoice.find("goToChain").text
                        desiredChoice = choice_unifier.get(desiredChoice, desiredChoice)

                        try:
                            link = self.jobChainLink.workflow.get_link(
                                this_choice_point
                            )
                        except KeyError:
                            return
                        for replacement in link.config["replacements"]:
                            if replacement["id"] == desiredChoice:
                                # In our JSON-encoded document, the items in
                                # the replacements are not wrapped, do it here.
                                # Needed by ReplacementDict.
                                ret = self._format_items(replacement["items"])
                                break
                        else:
                            return

                        try:
                            # <delay unitAtime="yes">30</delay>
                            delayXML = preconfiguredChoice.find("delay")
                            unitAtimeXML = None
                            if delayXML:
                                unitAtimeXML = delayXML.get("unitCtime")
                            if (
                                unitAtimeXML is not None
                                and unitAtimeXML.lower() != "no"
                            ):
                                delaySeconds = int(delayXML.text)
                                unitTime = os.path.getmtime(
                                    self.unit.currentPath.replace(
                                        "%sharedPath%",
                                        django_settings.SHARED_DIRECTORY,
                                        1,
                                    )
                                )
                                nowTime = time.time()
                                timeDifference = nowTime - unitTime
                                timeToGo = delaySeconds - timeDifference
                                LOGGER.info("Time to go: %s", timeToGo)
                                self.jobChainLink.setExitMessage(
                                    "Waiting till: "
                                    + datetime.datetime.fromtimestamp(
                                        (nowTime + timeToGo)
                                    ).ctime()
                                )
                                rd = ReplacementDict(ret)
                                if self.jobChainLink.passVar is not None:
                                    if isinstance(
                                        self.jobChainLink.passVar, ReplacementDict
                                    ):
                                        new = {}
                                        new.update(self.jobChainLink.passVar.dic)
                                        new.update(rd.dic)
                                        rd.dic = new
                                t = threading.Timer(
                                    timeToGo,
                                    self.jobChainLink.linkProcessingComplete,
                                    args=[0, rd],
                                    kwargs={},
                                )
                                t.daemon = True
                                t.start()

                                t2 = threading.Timer(
                                    timeToGo,
                                    self.jobChainLink.setExitMessage,
                                    args=[Job.STATUS_COMPLETED_SUCCESSFULLY],
                                    kwargs={},
                                )
                                t2.start()
                                return waitingOnTimer

                        except Exception:
                            LOGGER.info("Error parsing XML", exc_info=True)

            except Exception:
                LOGGER.warning(
                    "Error parsing xml at %s for pre-configured choice",
                    xmlFilePath,
                    exc_info=True,
                )
        return ret
Beispiel #15
0
    def checkForPreconfiguredXML(self):
        ret = None
        xmlFilePath = os.path.join( \
                                        self.unit.currentPath.replace("%sharedPath%", archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1) + "/", \
                                        archivematicaMCP.config.get('MCPServer', "processingXMLFile") \
                                    )

        if os.path.isfile(xmlFilePath):
            # For a list of items with pks:
            # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc;
            try:
                tree = etree.parse(xmlFilePath)
                root = tree.getroot()
                for preconfiguredChoice in root.findall(
                        ".//preconfiguredChoice"):
                    if preconfiguredChoice.find(
                            "appliesTo").text == self.jobChainLink.pk:
                        desiredChoice = preconfiguredChoice.find(
                            "goToChain").text
                        dic = MicroServiceChoiceReplacementDic.objects.get(
                            id=desiredChoice,
                            choiceavailableatlink=self.jobChainLink.pk)
                        ret = dic.replacementdic
                        try:
                            #<delay unitAtime="yes">30</delay>
                            delayXML = preconfiguredChoice.find("delay")
                            unitAtimeXML = delayXML.get("unitCtime")
                            if unitAtimeXML != None and unitAtimeXML.lower(
                            ) != "no":
                                delaySeconds = int(delayXML.text)
                                unitTime = os.path.getmtime(self.unit.currentPath.replace("%sharedPath%", \
                                               archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1))
                                nowTime = time.time()
                                timeDifference = nowTime - unitTime
                                timeToGo = delaySeconds - timeDifference
                                LOGGER.info('Time to go: %s', timeToGo)
                                self.jobChainLink.setExitMessage(
                                    "Waiting till: " +
                                    datetime.datetime.fromtimestamp(
                                        (nowTime + timeToGo)).ctime())
                                rd = ReplacementDict.fromstring(ret)
                                if self.jobChainLink.passVar != None:
                                    if isinstance(self.jobChainLink.passVar,
                                                  ReplacementDict):
                                        new = {}
                                        new.update(
                                            self.jobChainLink.passVar.dic)
                                        new.update(rd.dic)
                                        rd.dic = new
                                t = threading.Timer(
                                    timeToGo,
                                    self.jobChainLink.linkProcessingComplete,
                                    args=[0, rd],
                                    kwargs={})
                                t.daemon = True
                                t.start()

                                t2 = threading.Timer(
                                    timeToGo,
                                    self.jobChainLink.setExitMessage,
                                    args=["Completed successfully"],
                                    kwargs={})
                                t2.start()
                                return waitingOnTimer

                        except Exception:
                            LOGGER.info('Error parsing XML', exc_info=True)

            except Exception:
                LOGGER.warning(
                    'Error parsing xml at %s for pre-configured choice',
                    xmlFilePath,
                    exc_info=True)
        return ret