def getReplacementDic(self, target): """ Return a dict with all of the replacement strings for this unit and the value to replace with. """ ret = ReplacementDict.frommodel(type_="sip", sip=self.UUID) ret["%AIPFilename%"] = self.aipFilename ret["%unitType%"] = self.unitType ret["%SIPType%"] = self.sipType return ret
def getReplacementDic(self, target): ret = ReplacementDict.frommodel( type_='transfer', sip=self.UUID ) ret["%unitType%"] = self.unitType return ret
def test_replacementdict_model_constructor_file_only(): rd = ReplacementDict.frommodel(file_=FILE, type_='file') assert rd['%fileUUID%'] == FILE.uuid assert rd['%originalLocation%'] == FILE.originallocation assert rd['%currentLocation%'] == FILE.currentlocation assert rd['%relativeLocation%'] == FILE.currentlocation assert rd['%fileGrpUse%'] == FILE.filegrpuse
def main(task_uuid, file_uuid): setup_dicts(mcpclient_settings) succeeded = True file_ = File.objects.get(uuid=file_uuid) # Normally we don't transcribe derivatives (access copies, preservation copies); # however, some useful transcription tools can't handle some formats that # are common as the primary copies. For example, tesseract can't handle JPEG2000. # If there are no rules for the primary format passed in, try to look at each # derivative until a transcribable derivative is found. # # Skip derivatives to avoid double-scanning them; only look at them as a fallback. if file_.filegrpuse != "original": print('{} is not an original; not transcribing'.format(file_uuid), file=sys.stderr) return 0 rules = fetch_rules_for(file_) if not rules: file_, rules = fetch_rules_for_derivatives(file_) if not rules: print('No rules found for file {} and its derivatives; not transcribing'.format(file_uuid), file=sys.stderr) return 0 else: if file_.filegrpuse == "original": noun = "original" else: noun = file_.filegrpuse + " derivative" print('Transcribing {} {}'.format(noun, file_.uuid), file=sys.stderr) rd = ReplacementDict.frommodel(file_=file_, type_='file') for rule in rules: script = rule.command.command if rule.command.script_type in ('bashScript', 'command'): script, = rd.replace(script) args = [] else: args = rd.to_gnu_options exitstatus, stdout, stderr = executeOrRun(rule.command.script_type, script, arguments=args) if exitstatus != 0: succeeded = False output_path = rd.replace(rule.command.output_location)[0] relative_path = output_path.replace(rd['%SIPDirectory%'], '%SIPDirectory%') event = insert_transcription_event(exitstatus, file_uuid, rule, relative_path) if os.path.isfile(output_path): insert_file_into_database(file_uuid, rd['%SIPUUID%'], event, rule, output_path, relative_path) return 0 if succeeded else 1
def getReplacementDic(self, target=None): if target is not None and self.owningUnit: return self.owningUnit.getReplacementDic( self.owningUnit.currentPath) elif self.UUID != "None": return ReplacementDict.frommodel(type_='file', file_=self.UUID) # If no UUID has been assigned yet, we can't use the # ReplacementDict.frommodel constructor; fall back to the # old style of manual construction. else: return ReplacementDict({ "%relativeLocation%": self.currentPath, "%fileUUID%": self.UUID, "%fileGrpUse%": self.fileGrpUse })
def get_replacement_dict(job, opts): """ Generates values for all knows %var% replacement variables. """ prefix = "" postfix = "" output_dir = "" # get file name and extension (directory, basename) = os.path.split(opts.file_path) directory += os.path.sep # All paths should have trailing / (filename, extension_dot) = os.path.splitext(basename) if "preservation" in opts.purpose: postfix = "-" + opts.task_uuid output_dir = directory elif "access" in opts.purpose: prefix = opts.file_uuid + "-" output_dir = os.path.join(opts.sip_path, "DIP", "objects") + os.path.sep elif "thumbnail" in opts.purpose: output_dir = os.path.join(opts.sip_path, "thumbnails") + os.path.sep postfix = opts.file_uuid else: job.print_error("Unsupported command purpose", opts.purpose) return None # Populates the standard set of unit variables, so, # e.g., %fileUUID% is available replacement_dict = ReplacementDict.frommodel(type_="file", file_=opts.file_uuid) output_filename = "".join([prefix, filename, postfix]) replacement_dict.update({ "%outputDirectory%": output_dir, "%prefix%": prefix, "%postfix%": postfix, "%outputFileName%": output_filename, # does not include extension "%outputFilePath%": os.path.join(output_dir, output_filename), # does not include extension }) return replacement_dict
def test_replacementdict_model_constructor_sip(): rd = ReplacementDict.frommodel(sip=SIP, file_=FILE, type_='sip') # SIP-specific variables assert rd['%SIPUUID%'] == SIP.uuid assert rd['%relativeLocation%'] == SIP.currentpath assert rd['%currentPath%'] == SIP.currentpath assert rd['%SIPDirectory%'] == SIP.currentpath assert not '%transferDirectory%' in rd assert rd['%SIPDirectoryBasename%'] == os.path.basename(SIP.currentpath) assert rd['%SIPLogsDirectory%'] == os.path.join(SIP.currentpath, 'logs/') assert rd['%SIPObjectsDirectory%'] == os.path.join(SIP.currentpath, 'objects/') assert rd['%relativeLocation%'] == SIP.currentpath # File-specific variables assert rd['%fileUUID%'] == FILE.uuid assert rd['%originalLocation%'] == FILE.originallocation assert rd['%currentLocation%'] == FILE.currentlocation assert rd['%fileGrpUse%'] == FILE.filegrpuse
def test_replacementdict_model_constructor_transfer(): rd = ReplacementDict.frommodel(sip=TRANSFER, file_=FILE, type_='transfer') # Transfer-specific variables assert rd['%SIPUUID%'] == TRANSFER.uuid assert rd['%relativeLocation%'] == TRANSFER.currentlocation assert rd['%currentPath%'] == TRANSFER.currentlocation assert rd['%SIPDirectory%'] == TRANSFER.currentlocation assert rd['%transferDirectory%'] == TRANSFER.currentlocation assert rd['%SIPDirectoryBasename%'] == os.path.basename(TRANSFER.currentlocation) assert rd['%SIPLogsDirectory%'] == os.path.join(TRANSFER.currentlocation, 'logs/') assert rd['%SIPObjectsDirectory%'] == os.path.join(TRANSFER.currentlocation, 'objects/') # no, not actually relative assert rd['%relativeLocation%'] == TRANSFER.currentlocation # File-specific variables assert rd['%fileUUID%'] == FILE.uuid assert rd['%originalLocation%'] == FILE.originallocation assert rd['%currentLocation%'] == FILE.currentlocation assert rd['%fileGrpUse%'] == FILE.filegrpuse
def test_replacementdict_model_constructor_sip(): rd = ReplacementDict.frommodel(sip=SIP, file_=FILE, type_="sip") # SIP-specific variables assert rd["%SIPUUID%"] == SIP.uuid assert rd["%relativeLocation%"] == SIP.currentpath assert rd["%currentPath%"] == SIP.currentpath assert rd["%SIPDirectory%"] == SIP.currentpath assert "%transferDirectory%" not in rd assert rd["%SIPDirectoryBasename%"] == os.path.basename(SIP.currentpath) assert rd["%SIPLogsDirectory%"] == os.path.join(SIP.currentpath, "logs/") assert rd["%SIPObjectsDirectory%"] == os.path.join(SIP.currentpath, "objects/") assert rd["%relativeLocation%"] == SIP.currentpath # File-specific variables assert rd["%fileUUID%"] == FILE.uuid assert rd["%originalLocation%"] == FILE.originallocation assert rd["%currentLocation%"] == FILE.currentlocation assert rd["%fileGrpUse%"] == FILE.filegrpuse
def getReplacementDic(self, target): ret = ReplacementDict.frommodel(type_="sip", sip=self.UUID) # augment the dict here, because DIP is a special case whose paths are # not entirely based on data from the database - the locations need to # be overridden. sip_directory = self.currentPath.replace( django_settings.SHARED_DIRECTORY, "%sharedPath%") relative_directory_location = target.replace( django_settings.SHARED_DIRECTORY, "%sharedPath%") ret["%SIPLogsDirectory%"] = os.path.join(sip_directory, "logs", "") ret["%SIPObjectsDirectory%"] = os.path.join(sip_directory, "objects", "") ret["%SIPDirectory%"] = sip_directory ret["%SIPDirectoryBasename"] = os.path.basename( os.path.abspath(sip_directory)) ret["%relativeLocation%"] = target.replace( self.currentPath, relative_directory_location, 1) ret["%unitType%"] = "DIP" return ret
def getReplacementDic(self, target): ret = ReplacementDict.frommodel(type_='sip', sip=self.UUID) # augment the dict here, because DIP is a special case whose paths are # not entirely based on data from the database - the locations need to # be overridden. sip_directory = self.currentPath.replace( archivematicaMCP.config.get('MCPServer', "sharedDirectory"), "%sharedPath%") relative_directory_location = target.replace( archivematicaMCP.config.get('MCPServer', "sharedDirectory"), "%sharedPath%") ret["%SIPLogsDirectory%"] = os.path.join(sip_directory, "logs", "") ret["%SIPObjectsDirectory%"] = os.path.join(sip_directory, "objects", "") ret["%SIPDirectory%"] = sip_directory ret["%SIPDirectoryBasename"] = os.path.basename( os.path.abspath(sip_directory)) ret["%relativeLocation%"] = target.replace( self.currentPath, relative_directory_location, 1) ret["%unitType%"] = "DIP" return ret
def main(job, file_path, file_uuid, sip_uuid): setup_dicts(mcpclient_settings) failed = False # Check to see whether the file has already been characterized; don't try # to characterize it a second time if so. if FPCommandOutput.objects.filter(file_id=file_uuid).count() > 0: return 0 try: format = FormatVersion.active.get( fileformatversion__file_uuid=file_uuid) except FormatVersion.DoesNotExist: rules = format = None if format: rules = FPRule.active.filter(format=format.uuid, purpose="characterization") # Characterization always occurs - if nothing is specified, get one or more # defaults specified in the FPR. if not rules: rules = FPRule.active.filter(purpose="default_characterization") for rule in rules: if (rule.command.script_type == "bashScript" or rule.command.script_type == "command"): args = [] command_to_execute = replace_string_values(rule.command.command, file_=file_uuid, sip=sip_uuid, type_="file") else: rd = ReplacementDict.frommodel(file_=file_uuid, sip=sip_uuid, type_="file") args = rd.to_gnu_options() command_to_execute = rule.command.command exitstatus, stdout, stderr = executeOrRun( rule.command.script_type, command_to_execute, arguments=args, capture_output=True, ) job.write_output(stdout) job.write_error(stderr) if exitstatus != 0: job.write_error( "Command {} failed with exit status {}; stderr:".format( rule.command.description, exitstatus)) failed = True continue # fmt/101 is XML - we want to collect and package any XML output, while # allowing other commands to execute without actually collecting their # output in the event that they are writing their output to disk. # FPCommandOutput can have multiple rows for a given file, # distinguished by the rule that produced it. if (rule.command.output_format and rule.command.output_format.pronom_id == "fmt/101"): try: etree.fromstring(stdout) insertIntoFPCommandOutput(file_uuid, stdout, rule.uuid) job.write_output( 'Saved XML output for command "{}" ({})'.format( rule.command.description, rule.command.uuid)) except etree.XMLSyntaxError: failed = True job.write_error( 'XML output for command "{}" ({}) was not valid XML; not saving to database' .format(rule.command.description, rule.command.uuid)) else: job.write_error( 'Tool output for command "{}" ({}) is not XML; not saving to database' .format(rule.command.description, rule.command.uuid)) if failed: return 255 else: return 0