def proceedWithChoice(self, index, user_id): if user_id: agent_id = UserProfile.objects.get(user_id=int(user_id)).agent_id agent_id = str(agent_id) self.unit.setVariable("activeAgent", agent_id, None) choicesAvailableForUnitsLock.acquire() try: del choicesAvailableForUnits[self.jobChainLink.UUID] except KeyError: pass choicesAvailableForUnitsLock.release() # get the one at index, and go with it. _, _, replace_dict = self.choices[int(index)] rd = ReplacementDict.fromstring(replace_dict) self.update_passvar_replacement_dict(rd) self.jobChainLink.linkProcessingComplete( 0, passVar=self.jobChainLink.passVar)
def get_replacement_dict(opts): """ Generates values for all knows %var% replacement variables. """ prefix = "" postfix = "" output_dir = "" #get file name and extension (directory, basename) = os.path.split(opts.file_path) directory += os.path.sep # All paths should have trailing / (filename, extension_dot) = os.path.splitext(basename) if "preservation" in opts.purpose: postfix = "-" + opts.task_uuid output_dir = directory elif "access" in opts.purpose: prefix = opts.file_uuid + "-" output_dir = os.path.join(opts.sip_path, "DIP", "objects") + os.path.sep elif "thumbnail" in opts.purpose: output_dir = os.path.join(opts.sip_path, "thumbnails") + os.path.sep postfix = opts.file_uuid else: print("Unsupported command purpose", opts.purpose, file=sys.stderr) return None # Populates the standard set of unit variables, so, # e.g., %fileUUID% is available replacement_dict = ReplacementDict.frommodel(type_='file', file_=opts.file_uuid) output_filename = ''.join([prefix, filename, postfix]) replacement_dict.update({ "%outputDirectory%": output_dir, "%prefix%": prefix, "%postfix%": postfix, "%outputFileName%": output_filename, # does not include extension "%outputFilePath%": os.path.join(output_dir, output_filename) # does not include extension }) return replacement_dict
def test_replacementdict_model_constructor_transfer(): rd = ReplacementDict.frommodel(sip=TRANSFER, file_=FILE, type_='transfer') # Transfer-specific variables assert rd['%SIPUUID%'] == TRANSFER.uuid assert rd['%relativeLocation%'] == TRANSFER.currentlocation assert rd['%currentPath%'] == TRANSFER.currentlocation assert rd['%SIPDirectory%'] == TRANSFER.currentlocation assert rd['%transferDirectory%'] == TRANSFER.currentlocation assert rd['%SIPDirectoryBasename%'] == os.path.basename(TRANSFER.currentlocation) assert rd['%SIPLogsDirectory%'] == os.path.join(TRANSFER.currentlocation, 'logs/') assert rd['%SIPObjectsDirectory%'] == os.path.join(TRANSFER.currentlocation, 'objects/') # no, not actually relative assert rd['%relativeLocation%'] == TRANSFER.currentlocation # File-specific variables assert rd['%fileUUID%'] == FILE.uuid assert rd['%originalLocation%'] == FILE.originallocation assert rd['%currentLocation%'] == FILE.currentlocation assert rd['%fileGrpUse%'] == FILE.filegrpuse
def test_replacementdict_model_constructor_sip(): rd = ReplacementDict.frommodel(sip=SIP, file_=FILE, type_="sip") # SIP-specific variables assert rd["%SIPUUID%"] == SIP.uuid assert rd["%relativeLocation%"] == SIP.currentpath assert rd["%currentPath%"] == SIP.currentpath assert rd["%SIPDirectory%"] == SIP.currentpath assert "%transferDirectory%" not in rd assert rd["%SIPDirectoryBasename%"] == os.path.basename(SIP.currentpath) assert rd["%SIPLogsDirectory%"] == os.path.join(SIP.currentpath, "logs/") assert rd["%SIPObjectsDirectory%"] == os.path.join(SIP.currentpath, "objects/") assert rd["%relativeLocation%"] == SIP.currentpath # File-specific variables assert rd["%fileUUID%"] == FILE.uuid assert rd["%originalLocation%"] == FILE.originallocation assert rd["%currentLocation%"] == FILE.currentlocation assert rd["%fileGrpUse%"] == FILE.filegrpuse
def getReplacementDic(self, target): ret = ReplacementDict.frommodel(type_="sip", sip=self.UUID) # augment the dict here, because DIP is a special case whose paths are # not entirely based on data from the database - the locations need to # be overridden. sip_directory = self.currentPath.replace( django_settings.SHARED_DIRECTORY, "%sharedPath%") relative_directory_location = target.replace( django_settings.SHARED_DIRECTORY, "%sharedPath%") ret["%SIPLogsDirectory%"] = os.path.join(sip_directory, "logs", "") ret["%SIPObjectsDirectory%"] = os.path.join(sip_directory, "objects", "") ret["%SIPDirectory%"] = sip_directory ret["%SIPDirectoryBasename"] = os.path.basename( os.path.abspath(sip_directory)) ret["%relativeLocation%"] = target.replace( self.currentPath, relative_directory_location, 1) ret["%unitType%"] = "DIP" return ret
def getReplacementDic(self, target): ret = ReplacementDict.frommodel(type_='sip', sip=self.UUID) # augment the dict here, because DIP is a special case whose paths are # not entirely based on data from the database - the locations need to # be overridden. sip_directory = self.currentPath.replace( archivematicaMCP.config.get('MCPServer', "sharedDirectory"), "%sharedPath%") relative_directory_location = target.replace( archivematicaMCP.config.get('MCPServer', "sharedDirectory"), "%sharedPath%") ret["%SIPLogsDirectory%"] = os.path.join(sip_directory, "logs", "") ret["%SIPObjectsDirectory%"] = os.path.join(sip_directory, "objects", "") ret["%SIPDirectory%"] = sip_directory ret["%SIPDirectoryBasename"] = os.path.basename( os.path.abspath(sip_directory)) ret["%relativeLocation%"] = target.replace( self.currentPath, relative_directory_location, 1) ret["%unitType%"] = "DIP" return ret
def main(job, file_path, file_uuid, sip_uuid): setup_dicts(mcpclient_settings) failed = False # Check to see whether the file has already been characterized; don't try # to characterize it a second time if so. if FPCommandOutput.objects.filter(file_id=file_uuid).count() > 0: return 0 try: format = FormatVersion.active.get( fileformatversion__file_uuid=file_uuid) except FormatVersion.DoesNotExist: rules = format = None if format: rules = FPRule.active.filter(format=format.uuid, purpose="characterization") # Characterization always occurs - if nothing is specified, get one or more # defaults specified in the FPR. if not rules: rules = FPRule.active.filter(purpose="default_characterization") for rule in rules: if (rule.command.script_type == "bashScript" or rule.command.script_type == "command"): args = [] command_to_execute = replace_string_values(rule.command.command, file_=file_uuid, sip=sip_uuid, type_="file") else: rd = ReplacementDict.frommodel(file_=file_uuid, sip=sip_uuid, type_="file") args = rd.to_gnu_options() command_to_execute = rule.command.command exitstatus, stdout, stderr = executeOrRun( rule.command.script_type, command_to_execute, arguments=args, capture_output=True, ) job.write_output(stdout) job.write_error(stderr) if exitstatus != 0: job.write_error( "Command {} failed with exit status {}; stderr:".format( rule.command.description, exitstatus)) failed = True continue # fmt/101 is XML - we want to collect and package any XML output, while # allowing other commands to execute without actually collecting their # output in the event that they are writing their output to disk. # FPCommandOutput can have multiple rows for a given file, # distinguished by the rule that produced it. if (rule.command.output_format and rule.command.output_format.pronom_id == "fmt/101"): try: etree.fromstring(stdout) insertIntoFPCommandOutput(file_uuid, stdout, rule.uuid) job.write_output( 'Saved XML output for command "{}" ({})'.format( rule.command.description, rule.command.uuid)) except etree.XMLSyntaxError: failed = True job.write_error( 'XML output for command "{}" ({}) was not valid XML; not saving to database' .format(rule.command.description, rule.command.uuid)) else: job.write_error( 'Tool output for command "{}" ({}) is not XML; not saving to database' .format(rule.command.description, rule.command.uuid)) if failed: return 255 else: return 0
def main(job, task_uuid, file_uuid): setup_dicts(mcpclient_settings) succeeded = True file_ = File.objects.get(uuid=file_uuid) # Normally we don't transcribe derivatives (access copies, preservation copies); # however, some useful transcription tools can't handle some formats that # are common as the primary copies. For example, tesseract can't handle JPEG2000. # If there are no rules for the primary format passed in, try to look at each # derivative until a transcribable derivative is found. # # Skip derivatives to avoid double-scanning them; only look at them as a fallback. if file_.filegrpuse != "original": job.print_error( "{} is not an original; not transcribing".format(file_uuid)) return 0 rules = fetch_rules_for(file_) if not rules: file_, rules = fetch_rules_for_derivatives(file_) if not rules: job.print_error( "No rules found for file {} and its derivatives; not transcribing". format(file_uuid)) return 0 else: if file_.filegrpuse == "original": noun = "original" else: noun = file_.filegrpuse + " derivative" job.print_error("Transcribing {} {}".format(noun, file_.uuid)) rd = ReplacementDict.frommodel(file_=file_, type_="file") for rule in rules: script = rule.command.command if rule.command.script_type in ("bashScript", "command"): script, = rd.replace(script) args = [] else: args = rd.to_gnu_options exitstatus, stdout, stderr = executeOrRun(rule.command.script_type, script, arguments=args, capture_output=True) job.write_output(stdout) job.write_error(stderr) if exitstatus != 0: succeeded = False output_path = rd.replace(rule.command.output_location)[0] relative_path = output_path.replace(rd["%SIPDirectory%"], "%SIPDirectory%") event = insert_transcription_event(exitstatus, file_uuid, rule, relative_path) if os.path.isfile(output_path): insert_file_into_database( task_uuid, file_uuid, rd["%SIPUUID%"], event, rule, output_path, relative_path, ) return 0 if succeeded else 1
def checkForPreconfiguredXML(self): ret = None xmlFilePath = os.path.join( self.unit.currentPath.replace( "%sharedPath%", django_settings.SHARED_DIRECTORY, 1) + "/", django_settings.PROCESSING_XML_FILE) if os.path.isfile(xmlFilePath): # For a list of items with pks: # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc; try: this_choice_point = choice_unifier.get(self.jobChainLink.pk, self.jobChainLink.pk) tree = etree.parse(xmlFilePath) root = tree.getroot() for preconfiguredChoice in root.findall( ".//preconfiguredChoice"): if preconfiguredChoice.find( "appliesTo").text == this_choice_point: desiredChoice = preconfiguredChoice.find( "goToChain").text desiredChoice = choice_unifier.get( desiredChoice, desiredChoice) dic = MicroServiceChoiceReplacementDic.objects.get( id=desiredChoice, choiceavailableatlink=this_choice_point) ret = dic.replacementdic try: # <delay unitAtime="yes">30</delay> delayXML = preconfiguredChoice.find("delay") unitAtimeXML = None if delayXML: unitAtimeXML = delayXML.get("unitCtime") if unitAtimeXML is not None and unitAtimeXML.lower( ) != "no": delaySeconds = int(delayXML.text) unitTime = os.path.getmtime( self.unit.currentPath.replace( "%sharedPath%", django_settings.SHARED_DIRECTORY, 1)) nowTime = time.time() timeDifference = nowTime - unitTime timeToGo = delaySeconds - timeDifference LOGGER.info('Time to go: %s', timeToGo) self.jobChainLink.setExitMessage( "Waiting till: " + datetime.datetime.fromtimestamp( (nowTime + timeToGo)).ctime()) rd = ReplacementDict.fromstring(ret) if self.jobChainLink.passVar is not None: if isinstance(self.jobChainLink.passVar, ReplacementDict): new = {} new.update( self.jobChainLink.passVar.dic) new.update(rd.dic) rd.dic = new t = threading.Timer( timeToGo, self.jobChainLink.linkProcessingComplete, args=[0, rd], kwargs={}) t.daemon = True t.start() t2 = threading.Timer( timeToGo, self.jobChainLink.setExitMessage, args=[Job.STATUS_COMPLETED_SUCCESSFULLY], kwargs={}) t2.start() return waitingOnTimer except Exception: LOGGER.info('Error parsing XML', exc_info=True) except Exception: LOGGER.warning( 'Error parsing xml at %s for pre-configured choice', xmlFilePath, exc_info=True) return ret
def getReplacementDic(self, target): ret = ReplacementDict.frommodel(type_="transfer", sip=self.UUID) ret["%unitType%"] = self.unitType return ret
def test_replacementdict_replace(): d = ReplacementDict({"%PREFIX%": "/usr/local"}) assert d.replace("%PREFIX%/bin/") == ["/usr/local/bin/"]
def test_replacementdict_options(): d = ReplacementDict({'%relativeLocation%': 'bar'}) assert d.to_gnu_options() == ['--relative-location=bar']
def test_replacementdict_options(): d = ReplacementDict({"%relativeLocation%": "bar"}) assert d.to_gnu_options() == ["--relative-location=bar"]
def checkForPreconfiguredXML(self): ret = None xmlFilePath = os.path.join( self.unit.currentPath.replace( "%sharedPath%", django_settings.SHARED_DIRECTORY, 1 ) + "/", django_settings.PROCESSING_XML_FILE, ) if os.path.isfile(xmlFilePath): # For a list of items with pks: # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc; try: this_choice_point = choice_unifier.get( self.jobChainLink.pk, self.jobChainLink.pk ) tree = etree.parse(xmlFilePath) root = tree.getroot() for preconfiguredChoice in root.findall(".//preconfiguredChoice"): if preconfiguredChoice.find("appliesTo").text == this_choice_point: desiredChoice = preconfiguredChoice.find("goToChain").text desiredChoice = choice_unifier.get(desiredChoice, desiredChoice) try: link = self.jobChainLink.workflow.get_link( this_choice_point ) except KeyError: return for replacement in link.config["replacements"]: if replacement["id"] == desiredChoice: # In our JSON-encoded document, the items in # the replacements are not wrapped, do it here. # Needed by ReplacementDict. ret = self._format_items(replacement["items"]) break else: return try: # <delay unitAtime="yes">30</delay> delayXML = preconfiguredChoice.find("delay") unitAtimeXML = None if delayXML: unitAtimeXML = delayXML.get("unitCtime") if ( unitAtimeXML is not None and unitAtimeXML.lower() != "no" ): delaySeconds = int(delayXML.text) unitTime = os.path.getmtime( self.unit.currentPath.replace( "%sharedPath%", django_settings.SHARED_DIRECTORY, 1, ) ) nowTime = time.time() timeDifference = nowTime - unitTime timeToGo = delaySeconds - timeDifference LOGGER.info("Time to go: %s", timeToGo) self.jobChainLink.setExitMessage( "Waiting till: " + datetime.datetime.fromtimestamp( (nowTime + timeToGo) ).ctime() ) rd = ReplacementDict(ret) if self.jobChainLink.passVar is not None: if isinstance( self.jobChainLink.passVar, ReplacementDict ): new = {} new.update(self.jobChainLink.passVar.dic) new.update(rd.dic) rd.dic = new t = threading.Timer( timeToGo, self.jobChainLink.linkProcessingComplete, args=[0, rd], kwargs={}, ) t.daemon = True t.start() t2 = threading.Timer( timeToGo, self.jobChainLink.setExitMessage, args=[Job.STATUS_COMPLETED_SUCCESSFULLY], kwargs={}, ) t2.start() return waitingOnTimer except Exception: LOGGER.info("Error parsing XML", exc_info=True) except Exception: LOGGER.warning( "Error parsing xml at %s for pre-configured choice", xmlFilePath, exc_info=True, ) return ret
def checkForPreconfiguredXML(self): ret = None xmlFilePath = os.path.join( \ self.unit.currentPath.replace("%sharedPath%", archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1) + "/", \ archivematicaMCP.config.get('MCPServer', "processingXMLFile") \ ) if os.path.isfile(xmlFilePath): # For a list of items with pks: # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc; try: tree = etree.parse(xmlFilePath) root = tree.getroot() for preconfiguredChoice in root.findall( ".//preconfiguredChoice"): if preconfiguredChoice.find( "appliesTo").text == self.jobChainLink.pk: desiredChoice = preconfiguredChoice.find( "goToChain").text dic = MicroServiceChoiceReplacementDic.objects.get( id=desiredChoice, choiceavailableatlink=self.jobChainLink.pk) ret = dic.replacementdic try: #<delay unitAtime="yes">30</delay> delayXML = preconfiguredChoice.find("delay") unitAtimeXML = delayXML.get("unitCtime") if unitAtimeXML != None and unitAtimeXML.lower( ) != "no": delaySeconds = int(delayXML.text) unitTime = os.path.getmtime(self.unit.currentPath.replace("%sharedPath%", \ archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1)) nowTime = time.time() timeDifference = nowTime - unitTime timeToGo = delaySeconds - timeDifference LOGGER.info('Time to go: %s', timeToGo) self.jobChainLink.setExitMessage( "Waiting till: " + datetime.datetime.fromtimestamp( (nowTime + timeToGo)).ctime()) rd = ReplacementDict.fromstring(ret) if self.jobChainLink.passVar != None: if isinstance(self.jobChainLink.passVar, ReplacementDict): new = {} new.update( self.jobChainLink.passVar.dic) new.update(rd.dic) rd.dic = new t = threading.Timer( timeToGo, self.jobChainLink.linkProcessingComplete, args=[0, rd], kwargs={}) t.daemon = True t.start() t2 = threading.Timer( timeToGo, self.jobChainLink.setExitMessage, args=["Completed successfully"], kwargs={}) t2.start() return waitingOnTimer except Exception: LOGGER.info('Error parsing XML', exc_info=True) except Exception: LOGGER.warning( 'Error parsing xml at %s for pre-configured choice', xmlFilePath, exc_info=True) return ret