def verifyMetsFileSecChecksums(metsFile, date, taskUUID, transferDirectory, transferUUID, relativeDirectory="./"): print(metsFile) DspaceLicenses = "metadata/submissionDocumentation/DspaceLicenses" try: path = os.path.join(transferDirectory, DspaceLicenses) if not os.path.isdir(path): os.mkdir(path) except: print("error creating DspaceLicenses directory.") exitCode = 0 tree = etree.parse(metsFile) root = tree.getroot() for item in root.findall("{http://www.loc.gov/METS/}fileSec/{http://www.loc.gov/METS/}fileGrp"): # print etree.tostring(item) # print item USE = item.get("USE") if USE == "LICENSE": for item2 in item: if item2.tag == "{http://www.loc.gov/METS/}file": for item3 in item2: if item3.tag == "{http://www.loc.gov/METS/}FLocat": fileLocation = item3.get("{http://www.w3.org/1999/xlink}href") fileFullPath = os.path.join(relativeDirectory, fileLocation) dest = os.path.join(transferDirectory, DspaceLicenses, os.path.basename(fileLocation)) rename(fileFullPath, dest) src = fileFullPath.replace(transferDirectory, "%transferDirectory%") dst = dest.replace(transferDirectory, "%transferDirectory%") eventDetail = "" eventOutcomeDetailNote = "moved from=\"" + src + "\"; moved to=\"" + dst + "\"" updateFileLocation(src, dst, "movement", date, eventDetail, transferUUID=transferUUID, eventOutcomeDetailNote=eventOutcomeDetailNote) return exitCode
def verifyMetsFileSecChecksums(metsFile, date, taskUUID, transferDirectory, transferUUID, relativeDirectory="./"): print(metsFile) DSpaceMets = "metadata/submissionDocumentation/DSpaceMets" try: path = os.path.join(transferDirectory, DSpaceMets) if not os.path.isdir(path): os.mkdir(path) except: print("error creating DSpaceMets directory.") exitCode = 0 metsDirectory = os.path.basename(os.path.dirname(metsFile)) if metsDirectory == "DSpace_export": outputDirectory = path else: outputDirectory = os.path.join(path, metsDirectory) if not os.path.isdir(outputDirectory): os.mkdir(outputDirectory) dest = os.path.join(outputDirectory, "mets.xml") rename(metsFile, dest) src = metsFile.replace(transferDirectory, "%transferDirectory%") dst = dest.replace(transferDirectory, "%transferDirectory%") eventDetail = "" eventOutcomeDetailNote = "moved from=\"" + src + "\"; moved to=\"" + dst + "\"" updateFileLocation(src, dst, "movement", date, eventDetail, transferUUID=transferUUID, eventOutcomeDetailNote=eventOutcomeDetailNote) return exitCode
def moveSIP(src, dst, transferUUID, sharedDirectoryPath): # os.rename(src, dst) if src.endswith("/"): src = src[:-1] dest = dst.replace(sharedDirectoryPath, "%sharedPath%", 1) if dest.endswith("/"): dest = os.path.join(dest, os.path.basename(src)) if dest.endswith("/."): dest = os.path.join(dest[:-1], os.path.basename(src)) if os.path.isdir(src): dest += "/" updateDB(dest, transferUUID) rename(src, dst)
def something(SIPDirectory, accessDirectory, objectsDirectory, DIPDirectory, SIPUUID, date, copy=False): # exitCode = 435 exitCode = 179 print(SIPDirectory) # For every file, & directory Try to find the matching file & directory in the objects directory for (path, dirs, files) in os.walk(accessDirectory): for file in files: accessPath = os.path.join(path, file) objectPath = accessPath.replace(accessDirectory, objectsDirectory, 1) objectName = os.path.basename(objectPath) objectNameExtensionIndex = objectName.rfind(".") if objectNameExtensionIndex != -1: objectName = objectName[:objectNameExtensionIndex + 1] objectNameLike = os.path.join(os.path.dirname(objectPath), objectName).replace(SIPDirectory, "%SIPDirectory%", 1) files = File.objects.filter(removedtime__isnull=True, currentlocation__startswith=objectNameLike, sip_id=SIPUUID) if not files.exists(): print("No corresponding object for:", accessPath.replace(SIPDirectory, "%SIPDirectory%", 1), file=sys.stderr) exitCode = 1 update = [] for objectUUID, objectPath in files.values_list('uuid', 'currentlocation'): objectExtension = objectPath.replace(objectNameLike, "", 1) print(objectName[objectNameExtensionIndex + 1:], objectExtension, "\t", end=' ') if objectExtension.find(".") != -1: continue print(objectName[objectNameExtensionIndex + 1:], objectExtension, "\t", end=' ') dipPath = os.path.join(DIPDirectory, "objects", "%s-%s" % (objectUUID, os.path.basename(accessPath))) if copy: print("TODO - copy not supported yet") else: dest = dipPath rename(accessPath, dest) src = accessPath.replace(SIPDirectory, "%SIPDirectory%") dst = dest.replace(SIPDirectory, "%SIPDirectory%") update.append((src, dst)) for src, dst in update: eventDetail = "" eventOutcomeDetailNote = "moved from=\"" + src + "\"; moved to=\"" + dst + "\"" updateFileLocation(src, dst, "movement", date, eventDetail, sipUUID=SIPUUID, eventOutcomeDetailNote=eventOutcomeDetailNote) return exitCode
def moveSIP(src, dst, sipUUID, sharedDirectoryPath): # Prepare paths if src.endswith("/"): src = src[:-1] dest = dst.replace(sharedDirectoryPath, "%sharedPath%", 1) if dest.endswith("/"): dest = os.path.join(dest, os.path.basename(src)) if dest.endswith("/."): dest = os.path.join(dest[:-1], os.path.basename(src)) updateDB(dest + "/", sipUUID) # If destination already exists, delete it with warning dest_path = os.path.join(dst, os.path.basename(src)) if os.path.exists(dest_path): print(dest_path, 'exists, deleting', file=sys.stderr) shutil.rmtree(dest_path) rename(src, dst)
def verifyMetsFileSecChecksums( job, metsFile, date, taskUUID, transferDirectory, transferUUID, relativeDirectory="./", ): job.pyprint(metsFile) DSpaceMets = "metadata/submissionDocumentation/DSpaceMets" try: path = os.path.join(transferDirectory, DSpaceMets) if not os.path.isdir(path): os.mkdir(path) except: job.pyprint("error creating DSpaceMets directory.") exitCode = 0 metsDirectory = os.path.basename(os.path.dirname(metsFile)) if metsDirectory == "DSpace_export": outputDirectory = path else: outputDirectory = os.path.join(path, metsDirectory) if not os.path.isdir(outputDirectory): os.mkdir(outputDirectory) dest = os.path.join(outputDirectory, "mets.xml") rename_status = rename(metsFile, dest, printfn=job.pyprint, should_exit=False) if rename_status: return rename_status src = metsFile.replace(transferDirectory, "%transferDirectory%") dst = dest.replace(transferDirectory, "%transferDirectory%") eventDetail = "" eventOutcomeDetailNote = 'moved from="' + src + '"; moved to="' + dst + '"' updateFileLocation( src, dst, "movement", date, eventDetail, transferUUID=transferUUID, eventOutcomeDetailNote=eventOutcomeDetailNote, ) return exitCode
def moveSIP(job, src, dst, transferUUID, sharedDirectoryPath): # os.rename(src, dst) if src.endswith("/"): src = src[:-1] dest = dst.replace(sharedDirectoryPath, "%sharedPath%", 1) if dest.endswith("/"): dest = os.path.join(dest, os.path.basename(src)) if dest.endswith("/."): dest = os.path.join(dest[:-1], os.path.basename(src)) if os.path.isdir(src): dest += "/" updateDB(dest, transferUUID) return rename(src, dst, printfn=job.pyprint, should_exit=False)
def main(job, transfer_uuid, sip_directory, date, task_uuid, delete=False): files = File.objects.filter(transfer=transfer_uuid, removedtime__isnull=True) if not files: job.pyprint('No files found for transfer: ', transfer_uuid) transfer_mdl = Transfer.objects.get(uuid=transfer_uuid) # We track whether or not anything was extracted because that controls what # the next microservice chain link will be. # If something was extracted, then a new identification step has to be # kicked off on those files; otherwise, we can go ahead with the transfer. extracted = False for file_ in files: try: format_id = FileFormatVersion.objects.get(file_uuid=file_.uuid) # Can't do anything if the file wasn't identified in the previous step except: job.pyprint('Not extracting contents from', os.path.basename(file_.currentlocation), ' - file format not identified', file=sys.stderr) continue if format_id.format_version is None: job.pyprint('Not extracting contents from', os.path.basename(file_.currentlocation), ' - file format not identified', file=sys.stderr) continue # Extraction commands are defined in the FPR just like normalization # commands try: command = FPCommand.active.get( fprule__format=format_id.format_version, fprule__purpose='extract', fprule__enabled=True, ) except FPCommand.DoesNotExist: job.pyprint('Not extracting contents from', os.path.basename(file_.currentlocation), ' - No rule found to extract', file=sys.stderr) continue # Check if file has already been extracted if already_extracted(file_): job.pyprint('Not extracting contents from', os.path.basename(file_.currentlocation), ' - extraction already happened.', file=sys.stderr) continue output_file_path = file_.currentlocation.replace(TRANSFER_DIRECTORY, sip_directory) # Temporarily rename the input package so that when we extract the # contents we don't extract it to a directory that will conflict with # the names we want to preserve in our PREMIS:originalLocation. temp_dir = temporary_directory(output_file_path, date) rename(output_file_path, temp_dir) # Create the extract packages command. if command.script_type == 'command' or command.script_type == 'bashScript': args = [] command_to_execute = command.command.replace('%inputFile%', temp_dir) command_to_execute = command_to_execute.replace('%outputDirectory%', output_file_path) else: command_to_execute = command.command args = [temp_dir, output_file_path] # Make the command clear to users when inspecting stdin/stdout. logger.info("Command to execute is: %s", command_to_execute) exitstatus, stdout, stderr = executeOrRun(command.script_type, command_to_execute, arguments=args, printing=True, capture_output=True) job.write_output(stdout) job.write_error(stderr) if not exitstatus == 0: # Dang, looks like the extraction failed job.pyprint('Command', command.description, 'failed!', file=sys.stderr) else: extracted = True job.pyprint('Extracted contents from', os.path.basename(output_file_path)) # Assign UUIDs and insert them into the database, so the newly- # extracted files are properly tracked by Archivematica for extracted_file in tree(output_file_path): assign_uuid( job, extracted_file, file_.uuid, transfer_uuid, date, task_uuid, sip_directory, file_.currentlocation, file_.originallocation, output_file_path) if transfer_mdl.diruuids: create_extracted_dir_uuids( job, transfer_mdl, output_file_path, sip_directory, file_) # We may want to remove the original package file after extracting # its contents if delete: delete_and_record_package_file( job, temp_dir, file_.uuid, file_.currentlocation) if extracted: return 0 else: return 255