def call(jobs): for job in jobs: with job.JobContext(logger=logger): try: sip_path = job.args[1] # Move everything out of data directory for item in os.listdir(os.path.join(sip_path, 'data')): src = os.path.join(sip_path, 'data', item) dst = os.path.join(sip_path, item) _move_file(job, src, dst) os.rmdir(os.path.join(sip_path, 'data')) # Move metadata and logs out of objects if they exist objects_path = os.path.join(sip_path, 'objects') src = os.path.join(objects_path, 'metadata') dst = os.path.join(sip_path, 'metadata') _move_file(job, src, dst, exit_on_error=False) src = os.path.join(objects_path, 'logs') dst = os.path.join(sip_path, 'logs') _move_file(job, src, dst, exit_on_error=False) # Move anything unexpected to submission documentation # Leave objects, metadata, etc # Original METS ends up in submissionDocumentation subm_doc_path = os.path.join(sip_path, 'metadata', 'submissionDocumentation') os.makedirs(subm_doc_path) mets_file_path = None for item in os.listdir(sip_path): # Leave SIP structure if item in archivematicaFunctions.OPTIONAL_FILES + archivematicaFunctions.REQUIRED_DIRECTORIES: continue src = os.path.join(sip_path, item) dst = os.path.join(subm_doc_path, item) if item.startswith('METS.') and item.endswith('.xml'): mets_file_path = dst _move_file(job, src, dst) # Reconstruct any empty directories documented in the METS file under the # logical structMap labelled "Normative Directory Structure" if mets_file_path: archivematicaFunctions.reconstruct_empty_directories( mets_file_path, objects_path, logger=logger) else: logger.info( 'Unable to reconstruct empty directories: no METS file' ' could be found in {}'.format(sip_path)) archivematicaFunctions.create_structured_directory( sip_path, manual_normalization=True, printing=True, printfn=job.pyprint) except IOError as err: job.print_error(repr(err)) job.set_status(1)
def restructure_transfer(unit_path): # Create required directories create_structured_directory(unit_path, printing=True) # Move everything else to the objects directory for item in os.listdir(unit_path): src = os.path.join(unit_path, item) dst = os.path.join(unit_path, "objects", '.') if os.path.isdir(src) and item not in REQUIRED_DIRECTORIES: _move_file(src, dst) elif os.path.isfile(src) and item not in OPTIONAL_FILES: _move_file(src, dst)
def call(jobs): with transaction.atomic(): for job in jobs: with job.JobContext(): objectsDirectory = job.args[1] transferName = job.args[2] transferUUID = job.args[3] processingDirectory = job.args[4] autoProcessSIPDirectory = job.args[5] sharedPath = job.argv[6] transfer_objects_directory = '%transferDirectory%objects' for container in os.listdir(objectsDirectory): sipUUID = uuid.uuid4().__str__() containerPath = os.path.join(objectsDirectory, container) if not os.path.isdir(containerPath): job.pyprint("file (not container) found: ", container, file=sys.stderr) continue sipName = "%s-%s" % (transferName, container) tmpSIPDir = os.path.join(processingDirectory, sipName) + "/" destSIPDir = os.path.join(autoProcessSIPDirectory, sipName) + "/" archivematicaFunctions.create_structured_directory(tmpSIPDir, manual_normalization=True) databaseFunctions.createSIP(destSIPDir.replace(sharedPath, '%sharedPath%'), sipUUID, printfn=job.pyprint) # move the objects to the SIPDir for item in os.listdir(containerPath): shutil.move(os.path.join(containerPath, item), os.path.join(tmpSIPDir, "objects", item)) # get the database list of files in the objects directory # for each file, confirm it's in the SIP objects directory, and update the current location/ owning SIP' directory = os.path.join(transfer_objects_directory, container) files = File.objects.filter(removedtime__isnull=True, currentlocation__startswith=directory, transfer_id=transferUUID) for f in files: currentPath = databaseFunctions.deUnicode(f.currentlocation).replace(directory, transfer_objects_directory) currentSIPFilePath = currentPath.replace("%transferDirectory%", tmpSIPDir) if os.path.isfile(currentSIPFilePath): f.currentlocation = currentPath.replace("%transferDirectory%", "%SIPDirectory%") f.sip_id = sipUUID f.save() else: job.pyprint("file not found: ", currentSIPFilePath, file=sys.stderr) # moveSIPTo autoProcessSIPDirectory shutil.move(tmpSIPDir, destSIPDir)
src = os.path.join(sip_path, 'data', item) dst = os.path.join(sip_path, item) _move_file(src, dst) os.rmdir(os.path.join(sip_path, 'data')) # Move metadata and logs out of objects if they exist src = os.path.join(sip_path, 'objects', 'metadata') dst = os.path.join(sip_path, 'metadata') _move_file(src, dst, exit_on_error=False) src = os.path.join(sip_path, 'objects', 'logs') dst = os.path.join(sip_path, 'logs') _move_file(src, dst, exit_on_error=False) # Move anything unexpected to submission documentation # Leave objects, metadata, etc # Original METS ends up in submissionDocumentation os.makedirs(os.path.join(sip_path, 'metadata', 'submissionDocumentation')) for item in os.listdir(sip_path): # Leave SIP structure if item in archivematicaFunctions.OPTIONAL_FILES + archivematicaFunctions.REQUIRED_DIRECTORIES: continue src = os.path.join(sip_path, item) dst = os.path.join(sip_path, 'metadata', 'submissionDocumentation', item) _move_file(src, dst) archivematicaFunctions.create_structured_directory( sip_path, manual_normalization=True, printing=True)
autoProcessSIPDirectory = sys.argv[5] sharedPath = sys.argv[6] transfer_objects_directory = '%transferDirectory%objects' for container in os.listdir(objectsDirectory): sipUUID = uuid.uuid4().__str__() containerPath = os.path.join(objectsDirectory, container) if not os.path.isdir(containerPath): print >> sys.stderr, "file (not container) found: ", container continue sipName = "%s-%s" % (transferName, container) tmpSIPDir = os.path.join(processingDirectory, sipName) + "/" destSIPDir = os.path.join(autoProcessSIPDirectory, sipName) + "/" archivematicaFunctions.create_structured_directory( tmpSIPDir, manual_normalization=True) databaseFunctions.createSIP( destSIPDir.replace(sharedPath, '%sharedPath%'), sipUUID) # move the objects to the SIPDir for item in os.listdir(containerPath): shutil.move(os.path.join(containerPath, item), os.path.join(tmpSIPDir, "objects", item)) # get the database list of files in the objects directory # for each file, confirm it's in the SIP objects directory, and update the current location/ owning SIP' directory = os.path.join(transfer_objects_directory, container) files = File.objects.filter(removedtime__isnull=True, currentlocation__startswith=directory, transfer_id=transferUUID) for f in files:
def restructure_transfer_aip(unit_path): """ Restructure a transfer that comes from re-ingesting an Archivematica AIP. """ old_bag = os.path.join(unit_path, 'old_bag', '') os.makedirs(old_bag) # Move everything to old_bag for item in os.listdir(unit_path): if item == 'old_bag': continue src = os.path.join(unit_path, item) _move_file(src, old_bag) # Create required directories # - "/logs" and "/logs/fileMeta" # - "/metadata" and "/metadata/submissionDocumentation" # - "/objects" create_structured_directory(unit_path, printing=True) # Move /old_bag/data/METS.<UUID>.xml => /metadata/METS.<UUID>.xml p = re.compile(r'^METS\..*\.xml$', re.IGNORECASE) src = os.path.join(old_bag, 'data') for item in os.listdir(src): m = p.match(item) if m: break # Stop trying after the first match src = os.path.join(src, m.group()) dst = os.path.join(unit_path, 'metadata') mets_file_path = dst _move_file(src, dst) # Move /old_bag/data/objects/metadata/* => /metadata/ src = os.path.join(old_bag, 'data', 'objects', 'metadata') dst = os.path.join(unit_path, 'metadata') if os.path.isdir(src): for item in os.listdir(src): item_path = os.path.join(src, item) _move_file(item_path, dst) shutil.rmtree(src) # Move /old_bag/data/objects/submissionDocumentation/* => /metadata/submissionDocumentation/ src = os.path.join(old_bag, 'data', 'objects', 'submissionDocumentation') dst = os.path.join(unit_path, 'metadata', 'submissionDocumentation') for item in os.listdir(src): item_path = os.path.join(src, item) _move_file(item_path, dst) shutil.rmtree(src) # Move /old_bag/data/objects/* => /objects/ src = os.path.join(old_bag, 'data', 'objects') objects_path = dst = os.path.join(unit_path, 'objects') for item in os.listdir(src): item_path = os.path.join(src, item) _move_file(item_path, dst) # Move /old_bag/processingMCP.xml => /processingMCP.xml src = os.path.join(old_bag, 'processingMCP.xml') dst = os.path.join(unit_path, 'processingMCP.xml') if os.path.isfile(src): _move_file(src, dst) # Get rid of old_bag shutil.rmtree(old_bag) # Reconstruct any empty directories documented in the METS file under the # logical structMap labelled "Normative Directory Structure" reconstruct_empty_directories(mets_file_path, objects_path, logger=logger)
def call(jobs): with transaction.atomic(): for job in jobs: with job.JobContext(): objectsDirectory = job.args[1] transferName = job.args[2] transferUUID = job.args[3] processingDirectory = job.args[4] autoProcessSIPDirectory = job.args[5] sharedPath = job.args[6] sipName = transferName tmpSIPDir = os.path.join(processingDirectory, sipName) + "/" destSIPDir = os.path.join(autoProcessSIPDirectory, sipName) + "/" archivematicaFunctions.create_structured_directory( tmpSIPDir, manual_normalization=False) # If transfer is a reingested AIP, then pass that info to the SIP sip_type = "SIP" sip_uuid = None transfer = Transfer.objects.get(uuid=transferUUID) if transfer.type == "Archivematica AIP": sip_type = "AIP-REIN" # Use reingested AIP's UUID as the SIP UUID # Get AIP UUID from reingest METS name job.pyprint( "path", os.path.join(objectsDirectory, "..", "metadata"), "listdir", os.listdir( os.path.join(objectsDirectory, "..", "metadata")), ) for item in os.listdir( os.path.join(objectsDirectory, "..", "metadata")): if item.startswith("METS"): sip_uuid = item.replace("METS.", "").replace(".xml", "") job.pyprint("sip_uuid", sip_uuid) job.pyprint("sip_type", sip_type) # Find out if any ``Directory`` models were created for the source # ``Transfer``. If so, this fact gets recorded in the new ``SIP`` model. dir_mdls = Directory.objects.filter( transfer_id=transferUUID, currentlocation__startswith="%transferDirectory%objects", ) diruuids = len(dir_mdls) > 0 # Create row in SIPs table if one doesn't already exist lookup_path = destSIPDir.replace(sharedPath, "%sharedPath%") try: sip = SIP.objects.get(currentpath=lookup_path) if diruuids: sip.diruuids = True sip.save() except SIP.DoesNotExist: sip_uuid = databaseFunctions.createSIP( lookup_path, UUID=sip_uuid, sip_type=sip_type, diruuids=diruuids, printfn=job.pyprint, ) sip = SIP.objects.get(uuid=sip_uuid) # Set activeAgent using the value in Transfer. This ensures # that events generated in Ingest can fall to this value in # scenarios where the processing config does not require user # interfactions, e.g. in the "automated" processing config. try: unit_variable = UnitVariable.objects.get( unittype="Transfer", unituuid=transferUUID, variable="activeAgent", ) except UnitVariable.DoesNotExist: unit_variable = None if unit_variable: try: agent = Agent.objects.get( id=unit_variable.variablevalue) except Agent.DoesNotExist: pass else: sip.update_active_agent(agent.userprofile.user_id) # Move the objects to the SIPDir for item in os.listdir(objectsDirectory): src_path = os.path.join(objectsDirectory, item) dst_path = os.path.join(tmpSIPDir, "objects", item) # If dst_path already exists and is a directory, shutil.move # will move src_path into it rather than overwriting it; # to avoid incorrectly-nested paths, move src_path's contents # into it instead. if os.path.exists(dst_path) and os.path.isdir(src_path): for subitem in os.listdir(src_path): shutil.move(os.path.join(src_path, subitem), dst_path) else: shutil.move(src_path, dst_path) # Get the ``Directory`` models representing the subdirectories in the # objects/ directory. For each subdirectory, confirm it's in the SIP # objects/ directory, and update the current location and owning SIP. for dir_mdl in dir_mdls: currentPath = databaseFunctions.deUnicode( dir_mdl.currentlocation) currentSIPDirPath = currentPath.replace( "%transferDirectory%", tmpSIPDir) if os.path.isdir(currentSIPDirPath): dir_mdl.currentlocation = currentPath.replace( "%transferDirectory%", "%SIPDirectory%") dir_mdl.sip = sip dir_mdl.save() else: job.pyprint("directory not found: ", currentSIPDirPath, file=sys.stderr) # Get the database list of files in the objects directory. # For each file, confirm it's in the SIP objects directory, and update the # current location/ owning SIP' files = File.objects.filter( transfer_id=transferUUID, currentlocation__startswith="%transferDirectory%objects", removedtime__isnull=True, ) for f in files: currentPath = databaseFunctions.deUnicode( f.currentlocation) currentSIPFilePath = currentPath.replace( "%transferDirectory%", tmpSIPDir) if os.path.isfile(currentSIPFilePath): f.currentlocation = currentPath.replace( "%transferDirectory%", "%SIPDirectory%") f.sip = sip f.save() else: job.pyprint("file not found: ", currentSIPFilePath, file=sys.stderr) archivematicaFunctions.create_directories( archivematicaFunctions.MANUAL_NORMALIZATION_DIRECTORIES, basepath=tmpSIPDir, ) # Copy the JSON metadata file, if present; this contains a # serialized copy of DC metadata entered in the dashboard UI # during the transfer. src = os.path.normpath( os.path.join(objectsDirectory, "..", "metadata", "dc.json")) dst = os.path.join(tmpSIPDir, "metadata", "dc.json") if os.path.exists(src): shutil.copy(src, dst) # Copy processingMCP.xml file src = os.path.join(os.path.dirname(objectsDirectory[:-1]), "processingMCP.xml") dst = os.path.join(tmpSIPDir, "processingMCP.xml") shutil.copy(src, dst) # moveSIPTo autoProcessSIPDirectory shutil.move(tmpSIPDir, destSIPDir)
def call(jobs): with transaction.atomic(): for job in jobs: with job.JobContext(): objectsDirectory = job.args[1] transferName = job.args[2] transferUUID = job.args[3] processingDirectory = job.args[4] autoProcessSIPDirectory = job.args[5] sharedPath = job.args[6] sipName = transferName tmpSIPDir = os.path.join(processingDirectory, sipName) + "/" destSIPDir = os.path.join(autoProcessSIPDirectory, sipName) + "/" archivematicaFunctions.create_structured_directory( tmpSIPDir, manual_normalization=False) # If transfer is a reingested AIP, then pass that info to the SIP sip_type = 'SIP' sip_uuid = None transfer = Transfer.objects.get(uuid=transferUUID) if transfer.type == 'Archivematica AIP': sip_type = 'AIP-REIN' # Use reingested AIP's UUID as the SIP UUID # Get AIP UUID from reingest METS name job.pyprint( 'path', os.path.join(objectsDirectory, '..', 'metadata'), 'listdir', os.listdir( os.path.join(objectsDirectory, '..', 'metadata'))) for item in os.listdir( os.path.join(objectsDirectory, '..', 'metadata')): if item.startswith('METS'): sip_uuid = item.replace('METS.', '').replace('.xml', '') job.pyprint('sip_uuid', sip_uuid) job.pyprint('sip_type', sip_type) # Find out if any ``Directory`` models were created for the source # ``Transfer``. If so, this fact gets recorded in the new ``SIP`` model. dir_mdls = Directory.objects.filter( transfer_id=transferUUID, currentlocation__startswith='%transferDirectory%objects') diruuids = len(dir_mdls) > 0 # Create row in SIPs table if one doesn't already exist lookup_path = destSIPDir.replace(sharedPath, '%sharedPath%') try: sip = SIP.objects.get(currentpath=lookup_path).uuid if diruuids: sip.diruuids = True sip.save() except SIP.DoesNotExist: sip_uuid = databaseFunctions.createSIP(lookup_path, UUID=sip_uuid, sip_type=sip_type, diruuids=diruuids, printfn=job.pyprint) sip = SIP.objects.get(uuid=sip_uuid) # Move the objects to the SIPDir for item in os.listdir(objectsDirectory): src_path = os.path.join(objectsDirectory, item) dst_path = os.path.join(tmpSIPDir, "objects", item) # If dst_path already exists and is a directory, shutil.move # will move src_path into it rather than overwriting it; # to avoid incorrectly-nested paths, move src_path's contents # into it instead. if os.path.exists(dst_path) and os.path.isdir(src_path): for subitem in os.listdir(src_path): shutil.move(os.path.join(src_path, subitem), dst_path) else: shutil.move(src_path, dst_path) # Get the ``Directory`` models representing the subdirectories in the # objects/ directory. For each subdirectory, confirm it's in the SIP # objects/ directory, and update the current location and owning SIP. for dir_mdl in dir_mdls: currentPath = databaseFunctions.deUnicode( dir_mdl.currentlocation) currentSIPDirPath = currentPath.replace( "%transferDirectory%", tmpSIPDir) if os.path.isdir(currentSIPDirPath): dir_mdl.currentlocation = currentPath.replace( "%transferDirectory%", "%SIPDirectory%") dir_mdl.sip = sip dir_mdl.save() else: job.pyprint("directory not found: ", currentSIPDirPath, file=sys.stderr) # Get the database list of files in the objects directory. # For each file, confirm it's in the SIP objects directory, and update the # current location/ owning SIP' files = File.objects.filter( transfer_id=transferUUID, currentlocation__startswith='%transferDirectory%objects', removedtime__isnull=True) for f in files: currentPath = databaseFunctions.deUnicode( f.currentlocation) currentSIPFilePath = currentPath.replace( "%transferDirectory%", tmpSIPDir) if os.path.isfile(currentSIPFilePath): f.currentlocation = currentPath.replace( "%transferDirectory%", "%SIPDirectory%") f.sip = sip f.save() else: job.pyprint("file not found: ", currentSIPFilePath, file=sys.stderr) archivematicaFunctions.create_directories( archivematicaFunctions.MANUAL_NORMALIZATION_DIRECTORIES, basepath=tmpSIPDir) # Copy the JSON metadata file, if present; this contains a # serialized copy of DC metadata entered in the dashboard UI # during the transfer. src = os.path.normpath( os.path.join(objectsDirectory, "..", "metadata", "dc.json")) dst = os.path.join(tmpSIPDir, "metadata", "dc.json") if os.path.exists(src): shutil.copy(src, dst) # Copy processingMCP.xml file src = os.path.join(os.path.dirname(objectsDirectory[:-1]), "processingMCP.xml") dst = os.path.join(tmpSIPDir, "processingMCP.xml") shutil.copy(src, dst) # moveSIPTo autoProcessSIPDirectory shutil.move(tmpSIPDir, destSIPDir)