def _get_arrange_directory_tree(backlog_uuid, original_path, arrange_path): """ Fetches all the children of original_path from backlog_uuid and creates an identical tree in arrange_path. Helper function for copy_to_arrange. """ # TODO Use ElasticSearch, since that's where we're getting the original info from now? Could be easier to get file UUID that way ret = [] browse = storage_service.browse_location(backlog_uuid, original_path) # Add everything that is not a directory (ie that is a file) entries = [e for e in browse["entries"] if e not in browse["directories"]] for entry in entries: if entry not in ("processingMCP.xml"): path = os.path.join(original_path, entry) relative_path = (path.replace(DEFAULT_BACKLOG_PATH, "", 1),) try: file_info = storage_service.get_file_metadata( relative_path=relative_path )[0] except storage_service.ResourceNotFound: logger.warning( "No file information returned from the Storage Service for file at relative_path: %s", relative_path, ) raise file_uuid = file_info["fileuuid"] transfer_uuid = file_info["sipuuid"] ret.append( { "original_path": path, "arrange_path": os.path.join(arrange_path, entry), "file_uuid": file_uuid, "transfer_uuid": transfer_uuid, } ) # Add directories and recurse, adding their children too for directory in browse["directories"]: original_dir = os.path.join(original_path, directory, "") arrange_dir = os.path.join(arrange_path, directory, "") # Don't fetch metadata or logs dirs # TODO only filter if the children of a SIP ie /arrange/sipname/metadata if directory not in ("metadata", "logs"): ret.append( { "original_path": None, "arrange_path": arrange_dir, "file_uuid": None, "transfer_uuid": None, } ) ret.extend( _get_arrange_directory_tree(backlog_uuid, original_dir, arrange_dir) ) return ret
def _get_arrange_directory_tree(backlog_uuid, original_path, arrange_path): """ Fetches all the children of original_path from backlog_uuid and creates an identical tree in arrange_path. Helper function for copy_to_arrange. """ # TODO Use ElasticSearch, since that's where we're getting the original info from now? Could be easier to get file UUID that way ret = [] browse = storage_service.browse_location(backlog_uuid, original_path) # Add everything that is not a directory (ie that is a file) entries = [e for e in browse['entries'] if e not in browse['directories']] for entry in entries: if entry not in ('processingMCP.xml'): path = os.path.join(original_path, entry) relative_path = path.replace(DEFAULT_BACKLOG_PATH, '', 1) try: file_info = storage_service.get_file_metadata( relative_path=relative_path)[0] except storage_service.ResourceNotFound: logger.warning( 'No file information returned from the Storage Service for file at relative_path: %s', relative_path) raise file_uuid = file_info['fileuuid'] transfer_uuid = file_info['sipuuid'] ret.append({ 'original_path': path, 'arrange_path': os.path.join(arrange_path, entry), 'file_uuid': file_uuid, 'transfer_uuid': transfer_uuid }) # Add directories and recurse, adding their children too for directory in browse['directories']: original_dir = os.path.join(original_path, directory, '') arrange_dir = os.path.join(arrange_path, directory, '') # Don't fetch metadata or logs dirs # TODO only filter if the children of a SIP ie /arrange/sipname/metadata if not directory in ('metadata', 'logs'): ret.append({ 'original_path': None, 'arrange_path': arrange_dir, 'file_uuid': None, 'transfer_uuid': None }) ret.extend( _get_arrange_directory_tree(backlog_uuid, original_dir, arrange_dir)) return ret
def copy_to_arrange(request): """ Add files from backlog to in-progress SIPs being arranged. sourcepath: GET parameter, path relative to this pipelines backlog. Leading '/'s are stripped destination: GET parameter, path within arrange folder, should start with DEFAULT_ARRANGE_PATH ('/arrange/') """ # Insert each file into the DB error = None sourcepath = base64.b64decode(request.POST.get('filepath', '')).lstrip('/') destination = base64.b64decode(request.POST.get('destination', '')) logging.info('copy_to_arrange: sourcepath: {}'.format(sourcepath)) logging.info('copy_to_arrange: destination: {}'.format(destination)) # Lots of error checking: if not sourcepath or not destination: error = "GET parameter 'filepath' or 'destination' was blank." if not destination.startswith(DEFAULT_ARRANGE_PATH): error = '{} must be in arrange directory.'.format(destination) # If drop onto a file, drop it into its parent directory instead if not destination.endswith('/'): destination = os.path.dirname(destination) # Files cannot go into the top level folder if destination == DEFAULT_ARRANGE_PATH and not sourcepath.endswith('/'): error = '{} must go in a SIP, cannot be dropped onto {}'.format( sourcepath, DEFAULT_ARRANGE_PATH) # Create new SIPArrange entry for each object being copied over if not error: # IDEA memoize the backlog location? backlog_uuid = storage_service.get_location(purpose='BL')[0]['uuid'] to_add = [] # Construct the base arrange_path differently for files vs folders if sourcepath.endswith('/'): leaf_dir = sourcepath.split('/')[-2] # If dragging objects/ folder, actually move the contents of (not # the folder itself) if leaf_dir == 'objects': arrange_path = os.path.join(destination, '') else: # Strip UUID from transfer name uuid_regex = r'-[\w]{8}(-[\w]{4}){3}-[\w]{12}$' leaf_dir = re.sub(uuid_regex, '', leaf_dir) arrange_path = os.path.join(destination, leaf_dir) + '/' to_add.append({'original_path': None, 'arrange_path': arrange_path, 'file_uuid': None, 'transfer_uuid': None }) to_add.extend(_get_arrange_directory_tree(backlog_uuid, sourcepath, arrange_path)) else: arrange_path = os.path.join(destination, os.path.basename(sourcepath)) relative_path = sourcepath.replace(DEFAULT_BACKLOG_PATH, '', 1) try: file_info = storage_service.get_file_metadata(relative_path=relative_path)[0] except storage_service.ResourceNotFound: logging.warning('No file information returned from the Storage Service for file at relative_path: %s', relative_path) raise file_uuid = file_info.get('fileuuid') transfer_uuid = file_info.get('sipuuid') to_add.append({'original_path': sourcepath, 'arrange_path': arrange_path, 'file_uuid': file_uuid, 'transfer_uuid': transfer_uuid }) logging.info('copy_to_arrange: arrange_path: {}'.format(arrange_path)) logging.debug('copy_to_arrange: files to be added: {}'.format(to_add)) for entry in to_add: try: # TODO enforce uniqueness on arrange panel? models.SIPArrange.objects.create( original_path=entry['original_path'], arrange_path=entry['arrange_path'], file_uuid=entry['file_uuid'], transfer_uuid=entry['transfer_uuid'], ) except IntegrityError: # FIXME Expecting this to catch duplicate original_paths, which # we want to ignore since a file can only be in one SIP. Needs # to be updated not to ignore other classes of IntegrityErrors. logging.exception('Integrity error inserting: %s', entry) if error is not None: response = { 'message': error, 'error': True, } else: response = {'message': 'Files added to the SIP.'} return helpers.json_response(response)
def _copy_files_to_arrange( sourcepath, destination, fetch_children=False, backlog_uuid=None ): sourcepath = sourcepath.lstrip("/") # starts with 'originals/', not '/originals/' # Insert each file into the DB # Lots of error checking: if not sourcepath or not destination: raise ValueError(_("GET parameter 'filepath' or 'destination' was blank.")) if not destination.startswith(DEFAULT_ARRANGE_PATH): raise ValueError( _("%(path)s must be in arrange directory.") % {"path": destination} ) try: leaf_dir = sourcepath.split("/")[-2] except IndexError: leaf_dir = "" # Files cannot go into the top level folder, # and neither can the "objects" directory if destination == DEFAULT_ARRANGE_PATH and not ( sourcepath.endswith("/") or leaf_dir == "objects" ): raise ValueError( _("%(path1)s must go in a SIP, cannot be dropped onto %(path2)s") % {"path1": sourcepath, "path2": DEFAULT_ARRANGE_PATH} ) # Create new SIPArrange entry for each object being copied over if not backlog_uuid: backlog_uuid = storage_service.get_location(purpose="BL")[0]["uuid"] to_add = [] # Construct the base arrange_path differently for files vs folders if sourcepath.endswith("/"): # If dragging objects/ folder, actually move the contents of (not # the folder itself) if leaf_dir == "objects": arrange_path = os.path.join(destination, "") else: # Strip UUID from transfer name uuid_regex = r"-[\w]{8}(-[\w]{4}){3}-[\w]{12}$" leaf_dir = re.sub(uuid_regex, "", leaf_dir) arrange_path = os.path.join(destination, leaf_dir) + "/" to_add.append( { "original_path": None, "arrange_path": arrange_path, "file_uuid": None, "transfer_uuid": None, } ) if fetch_children: try: to_add.extend( _get_arrange_directory_tree(backlog_uuid, sourcepath, arrange_path) ) except storage_service.ResourceNotFound as e: raise ValueError( _("Storage Service failed with the message: %(messsage)s") % {"message": str(e)} ) else: if destination.endswith("/"): arrange_path = os.path.join(destination, os.path.basename(sourcepath)) else: arrange_path = destination relative_path = sourcepath.replace(DEFAULT_BACKLOG_PATH, "", 1) try: file_info = storage_service.get_file_metadata(relative_path=relative_path)[ 0 ] except storage_service.ResourceNotFound: raise ValueError( _( "No file information returned from the Storage Service for file at relative_path: %(path)s" ) % {"path": relative_path} ) file_uuid = file_info.get("fileuuid") transfer_uuid = file_info.get("sipuuid") to_add.append( { "original_path": sourcepath, "arrange_path": arrange_path, "file_uuid": file_uuid, "transfer_uuid": transfer_uuid, } ) logger.info("arrange_path: %s", arrange_path) logger.debug("files to be added: %s", to_add) return to_add
def copy_files_to_arrange(sourcepath, destination, fetch_children=False, backlog_uuid=None): sourcepath = sourcepath.lstrip( '/') # starts with 'originals/', not '/originals/' # Insert each file into the DB # Lots of error checking: if not sourcepath or not destination: raise ValueError( _("GET parameter 'filepath' or 'destination' was blank.")) if not destination.startswith(DEFAULT_ARRANGE_PATH): raise ValueError( _('%(path)s must be in arrange directory.') % {'path': destination}) try: leaf_dir = sourcepath.split('/')[-2] except IndexError: leaf_dir = '' # Files cannot go into the top level folder, # and neither can the "objects" directory if destination == DEFAULT_ARRANGE_PATH and not (sourcepath.endswith('/') or leaf_dir == 'objects'): raise ValueError( _('%(path1)s must go in a SIP, cannot be dropped onto %(path2)s') % { 'path1': sourcepath, 'path2': DEFAULT_ARRANGE_PATH }) # Create new SIPArrange entry for each object being copied over if not backlog_uuid: backlog_uuid = storage_service.get_location(purpose='BL')[0]['uuid'] to_add = [] # Construct the base arrange_path differently for files vs folders if sourcepath.endswith('/'): # If dragging objects/ folder, actually move the contents of (not # the folder itself) if leaf_dir == 'objects': arrange_path = os.path.join(destination, '') else: # Strip UUID from transfer name uuid_regex = r'-[\w]{8}(-[\w]{4}){3}-[\w]{12}$' leaf_dir = re.sub(uuid_regex, '', leaf_dir) arrange_path = os.path.join(destination, leaf_dir) + '/' to_add.append({ 'original_path': None, 'arrange_path': arrange_path, 'file_uuid': None, 'transfer_uuid': None, }) if fetch_children: try: to_add.extend( _get_arrange_directory_tree(backlog_uuid, sourcepath, arrange_path)) except storage_service.ResourceNotFound as e: raise ValueError( _('Storage Service failed with the message: %(messsage)s') % {'message': str(e)}) else: if destination.endswith('/'): arrange_path = os.path.join(destination, os.path.basename(sourcepath)) else: arrange_path = destination relative_path = sourcepath.replace(DEFAULT_BACKLOG_PATH, '', 1) try: file_info = storage_service.get_file_metadata( relative_path=relative_path)[0] except storage_service.ResourceNotFound: raise ValueError( _('No file information returned from the Storage Service for file at relative_path: %(path)s' ) % {'path': relative_path}) file_uuid = file_info.get('fileuuid') transfer_uuid = file_info.get('sipuuid') to_add.append({ 'original_path': sourcepath, 'arrange_path': arrange_path, 'file_uuid': file_uuid, 'transfer_uuid': transfer_uuid }) logger.info('arrange_path: %s', arrange_path) logger.debug('files to be added: %s', to_add) for entry in to_add: try: # TODO enforce uniqueness on arrange panel? models.SIPArrange.objects.create( original_path=entry['original_path'], arrange_path=entry['arrange_path'], file_uuid=entry['file_uuid'], transfer_uuid=entry['transfer_uuid'], ) except IntegrityError: # FIXME Expecting this to catch duplicate original_paths, which # we want to ignore since a file can only be in one SIP. Needs # to be updated not to ignore other classes of IntegrityErrors. logger.exception('Integrity error inserting: %s', entry)