def main(opts): """ Find and execute normalization commands on input file. """ # TODO fix for maildir working only on attachments # If no explicit return happens earlier, this returns `status`. # This allows default rules to define a non-zero exit status. status = SUCCESS # Find the file and it's FormatVersion (file identification) try: file_ = File.objects.get(uuid=opts.file_uuid) except File.DoesNotExist: print('File with uuid', opts.file_uuid, 'does not exist in database.', file=sys.stderr) return NO_RULE_FOUND print('File found:', file_.uuid, file_.currentlocation) # Unless normalization file group use is submissionDocumentation, skip the # submissionDocumentation directory if opts.normalize_file_grp_use != "submissionDocumentation" and file_.currentlocation.startswith( '%SIPDirectory%objects/submissionDocumentation'): print('File', os.path.basename(opts.file_path), 'in objects/submissionDocumentation, skipping') return SUCCESS # Only normalize files where the file's group use and normalize group use match if file_.filegrpuse != opts.normalize_file_grp_use: print(os.path.basename(opts.file_path), 'is file group usage', file_.filegrpuse, 'instead of ', opts.normalize_file_grp_use, ' - skipping') return SUCCESS # For re-ingest: clean up old derivations # If the file already has a Derivation with the same purpose, remove it and mark the derived file as deleted derivatives = Derivation.objects.filter( source_file=file_, derived_file__filegrpuse=opts.purpose) for derivative in derivatives: print(opts.purpose, 'derivative', derivative.derived_file_id, 'already exists, marking as deleted') File.objects.filter(uuid=derivative.derived_file_id).update( filegrpuse='deleted') # Don't create events for thumbnail files if opts.purpose != 'thumbnail': databaseFunctions.insertIntoEvents( fileUUID=derivative.derived_file_id, eventType='deletion', ) derivatives.delete() # If a file has been manually normalized for this purpose, skip it manually_normalized_file = check_manual_normalization(opts) if manually_normalized_file: print(os.path.basename(opts.file_path), 'was already manually normalized into', manually_normalized_file.currentlocation) if 'preservation' in opts.purpose: # Add derivation link and associated event insert_derivation_event( original_uuid=opts.file_uuid, output_uuid=manually_normalized_file.uuid, derivation_uuid=str(uuid.uuid4()), event_detail_output="manual normalization", outcome_detail_note=None, ) return SUCCESS format_id = get_object_or_None(FileFormatVersion, file_uuid=opts.file_uuid) # Look up the normalization command in the FPR if format_id is None: rule = get_default_rule(opts.purpose) print(os.path.basename(file_.currentlocation), "not identified - falling back to default", opts.purpose, "rule") else: print('File format:', format_id.format_version) try: rule = FPRule.active.get(format=format_id.format_version, purpose=opts.purpose) except FPRule.DoesNotExist: try: rule = get_default_rule(opts.purpose) print("No rule for", os.path.basename(file_.currentlocation), "falling back to default", opts.purpose, "rule") status = NO_RULE_FOUND except FPRule.DoesNotExist: print('Not normalizing', os.path.basename(file_.currentlocation), ' - No rule or default rule found to normalize for', opts.purpose, file=sys.stderr) return NO_RULE_FOUND print('Format Policy Rule:', rule) command = rule.command print('Format Policy Command', command.description) replacement_dict = get_replacement_dict(opts) cl = transcoder.CommandLinker(rule, command, replacement_dict, opts, once_normalized) exitstatus = cl.execute() # Store thumbnails locally for use during AIP searches # TODO is this still needed, with the storage service? if 'thumbnail' in opts.purpose: thumbnail_filepath = cl.commandObject.output_location clientConfigFilePath = '/etc/archivematica/MCPClient/clientConfig.conf' config = ConfigParser.SafeConfigParser() config.read(clientConfigFilePath) try: shared_path = config.get('MCPClient', 'sharedDirectoryMounted') except: shared_path = '/var/archivematica/sharedDirectory/' thumbnail_storage_dir = os.path.join( shared_path, 'www', 'thumbnails', opts.sip_uuid, ) try: os.makedirs(thumbnail_storage_dir) except OSError as e: if e.errno == errno.EEXIST and os.path.isdir( thumbnail_storage_dir): pass else: raise thumbnail_basename, thumbnail_extension = os.path.splitext( thumbnail_filepath) thumbnail_storage_file = os.path.join( thumbnail_storage_dir, opts.file_uuid + thumbnail_extension, ) shutil.copyfile(thumbnail_filepath, thumbnail_storage_file) if not exitstatus == 0: print('Command', command.description, 'failed!', file=sys.stderr) return RULE_FAILED else: print('Successfully normalized ', os.path.basename(opts.file_path), 'for', opts.purpose) return status
def identifyCommands(fileName): """Identify file type(s)""" ret = [] premisFile = opts.logsDirectory + "fileMeta/" + opts.fileUUID + ".xml" try: for pronomID in getPronomsFromPremis(premisFile): sql = """SELECT CR.pk, CR.command, CR.GroupMember FROM CommandRelationships AS CR JOIN FileIDs ON CR.fileID=FileIDs.pk JOIN CommandClassifications ON CR.commandClassification = CommandClassifications.pk JOIN FileIDsByPronom AS FIBP ON FileIDs.pk = FIBP.FileIDs WHERE FIBP.FileID = '""" + pronomID.__str__() + """' AND CommandClassifications.classification = '""" + opts.commandClassifications + """';""" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: ret.append(row) row = c.fetchone() sqlLock.release() except: print >> sys.stderr, "Failed to retrieve pronomIDs." ret = [] if transcoder.fileExtension: sql = """SELECT CR.pk, CR.command, CR.GroupMember FROM CommandRelationships AS CR JOIN FileIDs ON CR.fileID=FileIDs.pk JOIN CommandClassifications ON CR.commandClassification = CommandClassifications.pk JOIN FileIDsByExtension AS FIBE ON FileIDs.pk = FIBE.FileIDs WHERE FIBE.Extension = '""" + transcoder.fileExtension.__str__() + """' AND CommandClassifications.classification = '""" + opts.commandClassifications + """';""" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: ret.append(row) row = c.fetchone() sqlLock.release() if not len(ret): if opts.commandClassifications == "preservation": if inPreservationFormat(): print "Already in preservation format." else: print >> sys.stderr, "Unable to verify archival readiness." #Issue 528: related to exit code exit(0) elif opts.commandClassifications == "access": sql = """SELECT CR.pk, CR.command, CR.GroupMember FROM CommandRelationships AS CR JOIN Commands AS C ON CR.command = C.pk WHERE C.description = 'Copying file to access directory.';""" rows = databaseInterface.queryAllSQL(sql) for row in rows: cl = transcoder.CommandLinker(row) copyExitCode = cl.execute() if copyExitCode: exit(copyExitCode) if inAccessFormat(): print "Already in access format." exit(0) else: print >> sys.stderr, "Unable to verify access readiness." #Issue 528: related to exit code exit(0) elif opts.commandClassifications == "thumbnail": #use default thumbnail print "Using default thumbnail" sql = """SELECT CR.pk, CR.command, CR.GroupMember FROM CommandRelationships AS CR JOIN Commands AS C ON CR.command = C.pk WHERE C.description = 'Using default thumbnail.';""" rows = databaseInterface.queryAllSQL(sql) for row in rows: cl = transcoder.CommandLinker(row) copyExitCode = cl.execute() exit(copyExitCode) return ret
def main(opts): """ Find and execute normalization commands on input file. """ # TODO fix for maildir working only on attachments setup_dicts(mcpclient_settings) # Find the file and it's FormatVersion (file identification) try: file_ = File.objects.get(uuid=opts.file_uuid) except File.DoesNotExist: print('File with uuid', opts.file_uuid, 'does not exist in database.', file=sys.stderr) return NO_RULE_FOUND print('File found:', file_.uuid, file_.currentlocation) # Unless normalization file group use is submissionDocumentation, skip the # submissionDocumentation directory if opts.normalize_file_grp_use != "submissionDocumentation" and file_.currentlocation.startswith( '%SIPDirectory%objects/submissionDocumentation'): print('File', os.path.basename(opts.file_path), 'in objects/submissionDocumentation, skipping') return SUCCESS # Only normalize files where the file's group use and normalize group use match if file_.filegrpuse != opts.normalize_file_grp_use: print(os.path.basename(opts.file_path), 'is file group usage', file_.filegrpuse, 'instead of ', opts.normalize_file_grp_use, ' - skipping') return SUCCESS # For re-ingest: clean up old derivations # If the file already has a Derivation with the same purpose, remove it and mark the derived file as deleted derivatives = Derivation.objects.filter( source_file=file_, derived_file__filegrpuse=opts.purpose) for derivative in derivatives: print(opts.purpose, 'derivative', derivative.derived_file_id, 'already exists, marking as deleted') File.objects.filter(uuid=derivative.derived_file_id).update( filegrpuse='deleted') # Don't create events for thumbnail files if opts.purpose != 'thumbnail': databaseFunctions.insertIntoEvents( fileUUID=derivative.derived_file_id, eventType='deletion', ) derivatives.delete() # If a file has been manually normalized for this purpose, skip it manually_normalized_file = check_manual_normalization(opts) if manually_normalized_file: print(os.path.basename(opts.file_path), 'was already manually normalized into', manually_normalized_file.currentlocation) if 'preservation' in opts.purpose: # Add derivation link and associated event insert_derivation_event( original_uuid=opts.file_uuid, output_uuid=manually_normalized_file.uuid, derivation_uuid=str(uuid.uuid4()), event_detail_output="manual normalization", outcome_detail_note=None, ) return SUCCESS do_fallback = False format_id = get_object_or_None(FileFormatVersion, file_uuid=opts.file_uuid) # Look up the normalization command in the FPR if format_id: print('File format:', format_id.format_version) try: rule = FPRule.active.get(format=format_id.format_version, purpose=opts.purpose) except FPRule.DoesNotExist: do_fallback = True # Try with default rule if no format_id or rule was found if format_id is None or do_fallback: try: rule = get_default_rule(opts.purpose) print(os.path.basename(file_.currentlocation), "not identified or without rule", "- Falling back to default", opts.purpose, "rule") except FPRule.DoesNotExist: print('Not normalizing', os.path.basename(file_.currentlocation), ' - No rule or default rule found to normalize for', opts.purpose) return NO_RULE_FOUND print('Format Policy Rule:', rule) command = rule.command print('Format Policy Command', command.description) replacement_dict = get_replacement_dict(opts) cl = transcoder.CommandLinker(rule, command, replacement_dict, opts, once_normalized) exitstatus = cl.execute() # If the access/thumbnail normalization command has errored AND a # derivative was NOT created, then we run the default access/thumbnail # rule. Note that we DO need to check if the derivative file exists. Even # when a verification command exists for the normalization command, the # transcoder.py::Command.execute method will only run the verification # command if the normalization command returns a 0 exit code. # Errored thumbnail normalization also needs to result in default thumbnail # normalization; if not, then a transfer with a single file that failed # thumbnail normalization will result in a failed SIP at "Prepare DIP: Copy # thumbnails to DIP directory" if (exitstatus != 0 and opts.purpose in ('access', 'thumbnail') and cl.commandObject.output_location and (not os.path.isfile(cl.commandObject.output_location))): # Fall back to default rule try: fallback_rule = get_default_rule(opts.purpose) print(opts.purpose, 'normalization failed, falling back to default', opts.purpose, 'rule') except FPRule.DoesNotExist: print('Not retrying normalizing for', os.path.basename(file_.currentlocation), ' - No default rule found to normalize for', opts.purpose) fallback_rule = None # Don't re-run the same command if fallback_rule and fallback_rule.command != command: print('Fallback Format Policy Rule:', fallback_rule) command = fallback_rule.command print('Fallback Format Policy Command', command.description) # Use existing replacement dict cl = transcoder.CommandLinker(fallback_rule, command, replacement_dict, opts, once_normalized) exitstatus = cl.execute() # Store thumbnails locally for use during AIP searches # TODO is this still needed, with the storage service? if 'thumbnail' in opts.purpose: thumbnail_filepath = cl.commandObject.output_location thumbnail_storage_dir = os.path.join( mcpclient_settings.SHARED_DIRECTORY, 'www', 'thumbnails', opts.sip_uuid, ) try: os.makedirs(thumbnail_storage_dir) except OSError as e: if e.errno == errno.EEXIST and os.path.isdir( thumbnail_storage_dir): pass else: raise thumbnail_basename, thumbnail_extension = os.path.splitext( thumbnail_filepath) thumbnail_storage_file = os.path.join( thumbnail_storage_dir, opts.file_uuid + thumbnail_extension, ) shutil.copyfile(thumbnail_filepath, thumbnail_storage_file) if not exitstatus == 0: print('Command', command.description, 'failed!', file=sys.stderr) return RULE_FAILED else: print('Successfully normalized ', os.path.basename(opts.file_path), 'for', opts.purpose) return SUCCESS
def executeCommandReleationship(gearman_worker, gearman_job): """some text""" try: execute = gearman_job.task print "executing:", execute, "{", gearman_job.unique, "}" data = cPickle.loads(gearman_job.data) utcDate = databaseInterface.getUTCDate() opts = data["arguments"] #.encode("utf-8") #if isinstance(arguments, unicode): # arguments = arguments.encode("utf-8") #if isinstance(arguments, str): # arguments = unicode(arguments) sInput = "" clientID = gearman_worker.worker_client_id opts["date"] = utcDate opts["accessDirectory"] = os.path.join(opts['sipPath'], "DIP/objects") + "/" opts["thumbnailDirectory"] = os.path.join(opts['sipPath'], "thumbnails") + "/" print opts for key, value in archivematicaClient.replacementDic.iteritems(): for key2 in opts: opts[key2] = opts[key2].replace(key, value) replacementDic = getReplacementDic(opts) opts[ "prependStdOut"] = """Operating on file: {%s}%s \r\nUsing %s command classifications""" % ( opts["fileUUID"], replacementDic["%fileName%"], opts["commandClassification"]) opts["prependStdError"] = "\r\nSTDError:" # print clientID, execute, data archivematicaClient.logTaskAssignedSQL(gearman_job.unique.__str__(), clientID, utcDate) cl = transcoder.CommandLinker(opts["CommandRelationship"], replacementDic, opts, onceNormalized) cl.execute() co = cl.commandObject exitCode = co.exitCode stdOut = "%s \r\n%s" % (opts["prependStdOut"], co.stdOut) if not co.stdError or co.stdError.isspace(): stdError = "" else: stdError = "%s \r\n%s" % (opts["prependStdError"], co.stdError) #TODO add date to ops #Replace replacement strings #archivematicaClient.printOutputLock.acquire() #print >>sys.stderr, "<processingCommand>{" + gearman_job.unique + "}" + command.__str__() + "</processingCommand>" #archivematicaClient.printOutputLock.release() #exitCode, stdOut, stdError = executeOrRun("command", command, sInput, printing=False) return cPickle.dumps({ "exitCode": exitCode, "stdOut": stdOut, "stdError": stdError }) #catch OS errors except OSError, ose: archivematicaClient.printOutputLock.acquire() traceback.print_exc(file=sys.stdout) print >> sys.stderr, "Execution failed:", ose archivematicaClient.printOutputLock.release() output = ["Config Error!", ose.__str__()] exitCode = 1 return cPickle.dumps({ "exitCode": exitCode, "stdOut": output[0], "stdError": output[1] })