Esempio n. 1
0
def _get_arrange_directory_tree(backlog_uuid, original_path, arrange_path):
    """ Fetches all the children of original_path from backlog_uuid and creates
    an identical tree in arrange_path.

    Helper function for copy_to_arrange.
    """
    # TODO Use ElasticSearch, since that's where we're getting the original info from now?  Could be easier to get file UUID that way
    ret = []
    browse = storage_service.browse_location(backlog_uuid, original_path)

    # Add everything that is not a directory (ie that is a file)
    entries = [e for e in browse["entries"] if e not in browse["directories"]]
    for entry in entries:
        if entry not in ("processingMCP.xml"):
            path = os.path.join(original_path, entry)
            relative_path = (path.replace(DEFAULT_BACKLOG_PATH, "", 1),)
            try:
                file_info = storage_service.get_file_metadata(
                    relative_path=relative_path
                )[0]
            except storage_service.ResourceNotFound:
                logger.warning(
                    "No file information returned from the Storage Service for file at relative_path: %s",
                    relative_path,
                )
                raise
            file_uuid = file_info["fileuuid"]
            transfer_uuid = file_info["sipuuid"]
            ret.append(
                {
                    "original_path": path,
                    "arrange_path": os.path.join(arrange_path, entry),
                    "file_uuid": file_uuid,
                    "transfer_uuid": transfer_uuid,
                }
            )

    # Add directories and recurse, adding their children too
    for directory in browse["directories"]:
        original_dir = os.path.join(original_path, directory, "")
        arrange_dir = os.path.join(arrange_path, directory, "")
        # Don't fetch metadata or logs dirs
        # TODO only filter if the children of a SIP ie /arrange/sipname/metadata
        if directory not in ("metadata", "logs"):
            ret.append(
                {
                    "original_path": None,
                    "arrange_path": arrange_dir,
                    "file_uuid": None,
                    "transfer_uuid": None,
                }
            )
            ret.extend(
                _get_arrange_directory_tree(backlog_uuid, original_dir, arrange_dir)
            )

    return ret
Esempio n. 2
0
def _get_arrange_directory_tree(backlog_uuid, original_path, arrange_path):
    """ Fetches all the children of original_path from backlog_uuid and creates
    an identical tree in arrange_path.

    Helper function for copy_to_arrange.
    """
    # TODO Use ElasticSearch, since that's where we're getting the original info from now?  Could be easier to get file UUID that way
    ret = []
    browse = storage_service.browse_location(backlog_uuid, original_path)

    # Add everything that is not a directory (ie that is a file)
    entries = [e for e in browse['entries'] if e not in browse['directories']]
    for entry in entries:
        if entry not in ('processingMCP.xml'):
            path = os.path.join(original_path, entry)
            relative_path = path.replace(DEFAULT_BACKLOG_PATH, '', 1)
            try:
                file_info = storage_service.get_file_metadata(
                    relative_path=relative_path)[0]
            except storage_service.ResourceNotFound:
                logger.warning(
                    'No file information returned from the Storage Service for file at relative_path: %s',
                    relative_path)
                raise
            file_uuid = file_info['fileuuid']
            transfer_uuid = file_info['sipuuid']
            ret.append({
                'original_path': path,
                'arrange_path': os.path.join(arrange_path, entry),
                'file_uuid': file_uuid,
                'transfer_uuid': transfer_uuid
            })

    # Add directories and recurse, adding their children too
    for directory in browse['directories']:
        original_dir = os.path.join(original_path, directory, '')
        arrange_dir = os.path.join(arrange_path, directory, '')
        # Don't fetch metadata or logs dirs
        # TODO only filter if the children of a SIP ie /arrange/sipname/metadata
        if not directory in ('metadata', 'logs'):
            ret.append({
                'original_path': None,
                'arrange_path': arrange_dir,
                'file_uuid': None,
                'transfer_uuid': None
            })
            ret.extend(
                _get_arrange_directory_tree(backlog_uuid, original_dir,
                                            arrange_dir))

    return ret
Esempio n. 3
0
def copy_to_arrange(request):
    """ Add files from backlog to in-progress SIPs being arranged.

    sourcepath: GET parameter, path relative to this pipelines backlog. Leading
        '/'s are stripped
    destination: GET parameter, path within arrange folder, should start with
        DEFAULT_ARRANGE_PATH ('/arrange/')
    """
    # Insert each file into the DB

    error = None
    sourcepath  = base64.b64decode(request.POST.get('filepath', '')).lstrip('/')
    destination = base64.b64decode(request.POST.get('destination', ''))
    logging.info('copy_to_arrange: sourcepath: {}'.format(sourcepath))
    logging.info('copy_to_arrange: destination: {}'.format(destination))

    # Lots of error checking:
    if not sourcepath or not destination:
        error = "GET parameter 'filepath' or 'destination' was blank."
    if not destination.startswith(DEFAULT_ARRANGE_PATH):
        error = '{} must be in arrange directory.'.format(destination)
    # If drop onto a file, drop it into its parent directory instead
    if not destination.endswith('/'):
        destination = os.path.dirname(destination)
    # Files cannot go into the top level folder
    if destination == DEFAULT_ARRANGE_PATH and not sourcepath.endswith('/'):
        error = '{} must go in a SIP, cannot be dropped onto {}'.format(
            sourcepath, DEFAULT_ARRANGE_PATH)

    # Create new SIPArrange entry for each object being copied over
    if not error:
        # IDEA memoize the backlog location?
        backlog_uuid = storage_service.get_location(purpose='BL')[0]['uuid']
        to_add = []

        # Construct the base arrange_path differently for files vs folders
        if sourcepath.endswith('/'):
            leaf_dir = sourcepath.split('/')[-2]
            # If dragging objects/ folder, actually move the contents of (not
            # the folder itself)
            if leaf_dir == 'objects':
                arrange_path = os.path.join(destination, '')
            else:
                # Strip UUID from transfer name
                uuid_regex = r'-[\w]{8}(-[\w]{4}){3}-[\w]{12}$'
                leaf_dir = re.sub(uuid_regex, '', leaf_dir)
                arrange_path = os.path.join(destination, leaf_dir) + '/'
                to_add.append({'original_path': None,
                   'arrange_path': arrange_path,
                   'file_uuid': None,
                   'transfer_uuid': None
                })
            to_add.extend(_get_arrange_directory_tree(backlog_uuid, sourcepath, arrange_path))
        else:
            arrange_path = os.path.join(destination, os.path.basename(sourcepath))
            relative_path = sourcepath.replace(DEFAULT_BACKLOG_PATH, '', 1)
            try:
                file_info = storage_service.get_file_metadata(relative_path=relative_path)[0]
            except storage_service.ResourceNotFound:
                logging.warning('No file information returned from the Storage Service for file at relative_path: %s', relative_path)
                raise
            file_uuid = file_info.get('fileuuid')
            transfer_uuid = file_info.get('sipuuid')
            to_add.append({'original_path': sourcepath,
               'arrange_path': arrange_path,
               'file_uuid': file_uuid,
               'transfer_uuid': transfer_uuid
            })

        logging.info('copy_to_arrange: arrange_path: {}'.format(arrange_path))
        logging.debug('copy_to_arrange: files to be added: {}'.format(to_add))

        for entry in to_add:
            try:
                # TODO enforce uniqueness on arrange panel?
                models.SIPArrange.objects.create(
                    original_path=entry['original_path'],
                    arrange_path=entry['arrange_path'],
                    file_uuid=entry['file_uuid'],
                    transfer_uuid=entry['transfer_uuid'],
                )
            except IntegrityError:
                # FIXME Expecting this to catch duplicate original_paths, which
                # we want to ignore since a file can only be in one SIP.  Needs
                # to be updated not to ignore other classes of IntegrityErrors.
                logging.exception('Integrity error inserting: %s', entry)

    if error is not None:
        response = {
            'message': error,
            'error': True,
        }
    else:
        response = {'message': 'Files added to the SIP.'}

    return helpers.json_response(response)
Esempio n. 4
0
def _copy_files_to_arrange(
    sourcepath, destination, fetch_children=False, backlog_uuid=None
):
    sourcepath = sourcepath.lstrip("/")  # starts with 'originals/', not '/originals/'
    # Insert each file into the DB

    # Lots of error checking:
    if not sourcepath or not destination:
        raise ValueError(_("GET parameter 'filepath' or 'destination' was blank."))
    if not destination.startswith(DEFAULT_ARRANGE_PATH):
        raise ValueError(
            _("%(path)s must be in arrange directory.") % {"path": destination}
        )

    try:
        leaf_dir = sourcepath.split("/")[-2]
    except IndexError:
        leaf_dir = ""
    # Files cannot go into the top level folder,
    # and neither can the "objects" directory
    if destination == DEFAULT_ARRANGE_PATH and not (
        sourcepath.endswith("/") or leaf_dir == "objects"
    ):
        raise ValueError(
            _("%(path1)s must go in a SIP, cannot be dropped onto %(path2)s")
            % {"path1": sourcepath, "path2": DEFAULT_ARRANGE_PATH}
        )

    # Create new SIPArrange entry for each object being copied over
    if not backlog_uuid:
        backlog_uuid = storage_service.get_location(purpose="BL")[0]["uuid"]
    to_add = []

    # Construct the base arrange_path differently for files vs folders
    if sourcepath.endswith("/"):
        # If dragging objects/ folder, actually move the contents of (not
        # the folder itself)
        if leaf_dir == "objects":
            arrange_path = os.path.join(destination, "")
        else:
            # Strip UUID from transfer name
            uuid_regex = r"-[\w]{8}(-[\w]{4}){3}-[\w]{12}$"
            leaf_dir = re.sub(uuid_regex, "", leaf_dir)
            arrange_path = os.path.join(destination, leaf_dir) + "/"
            to_add.append(
                {
                    "original_path": None,
                    "arrange_path": arrange_path,
                    "file_uuid": None,
                    "transfer_uuid": None,
                }
            )
        if fetch_children:
            try:
                to_add.extend(
                    _get_arrange_directory_tree(backlog_uuid, sourcepath, arrange_path)
                )
            except storage_service.ResourceNotFound as e:
                raise ValueError(
                    _("Storage Service failed with the message: %(messsage)s")
                    % {"message": str(e)}
                )
    else:
        if destination.endswith("/"):
            arrange_path = os.path.join(destination, os.path.basename(sourcepath))
        else:
            arrange_path = destination
        relative_path = sourcepath.replace(DEFAULT_BACKLOG_PATH, "", 1)
        try:
            file_info = storage_service.get_file_metadata(relative_path=relative_path)[
                0
            ]
        except storage_service.ResourceNotFound:
            raise ValueError(
                _(
                    "No file information returned from the Storage Service for file at relative_path: %(path)s"
                )
                % {"path": relative_path}
            )
        file_uuid = file_info.get("fileuuid")
        transfer_uuid = file_info.get("sipuuid")
        to_add.append(
            {
                "original_path": sourcepath,
                "arrange_path": arrange_path,
                "file_uuid": file_uuid,
                "transfer_uuid": transfer_uuid,
            }
        )

    logger.info("arrange_path: %s", arrange_path)
    logger.debug("files to be added: %s", to_add)

    return to_add
Esempio n. 5
0
def copy_files_to_arrange(sourcepath,
                          destination,
                          fetch_children=False,
                          backlog_uuid=None):
    sourcepath = sourcepath.lstrip(
        '/')  # starts with 'originals/', not '/originals/'
    # Insert each file into the DB

    # Lots of error checking:
    if not sourcepath or not destination:
        raise ValueError(
            _("GET parameter 'filepath' or 'destination' was blank."))
    if not destination.startswith(DEFAULT_ARRANGE_PATH):
        raise ValueError(
            _('%(path)s must be in arrange directory.') %
            {'path': destination})

    try:
        leaf_dir = sourcepath.split('/')[-2]
    except IndexError:
        leaf_dir = ''
    # Files cannot go into the top level folder,
    # and neither can the "objects" directory
    if destination == DEFAULT_ARRANGE_PATH and not (sourcepath.endswith('/')
                                                    or leaf_dir == 'objects'):
        raise ValueError(
            _('%(path1)s must go in a SIP, cannot be dropped onto %(path2)s') %
            {
                'path1': sourcepath,
                'path2': DEFAULT_ARRANGE_PATH
            })

    # Create new SIPArrange entry for each object being copied over
    if not backlog_uuid:
        backlog_uuid = storage_service.get_location(purpose='BL')[0]['uuid']
    to_add = []

    # Construct the base arrange_path differently for files vs folders
    if sourcepath.endswith('/'):
        # If dragging objects/ folder, actually move the contents of (not
        # the folder itself)
        if leaf_dir == 'objects':
            arrange_path = os.path.join(destination, '')
        else:
            # Strip UUID from transfer name
            uuid_regex = r'-[\w]{8}(-[\w]{4}){3}-[\w]{12}$'
            leaf_dir = re.sub(uuid_regex, '', leaf_dir)
            arrange_path = os.path.join(destination, leaf_dir) + '/'
            to_add.append({
                'original_path': None,
                'arrange_path': arrange_path,
                'file_uuid': None,
                'transfer_uuid': None,
            })
        if fetch_children:
            try:
                to_add.extend(
                    _get_arrange_directory_tree(backlog_uuid, sourcepath,
                                                arrange_path))
            except storage_service.ResourceNotFound as e:
                raise ValueError(
                    _('Storage Service failed with the message: %(messsage)s')
                    % {'message': str(e)})
    else:
        if destination.endswith('/'):
            arrange_path = os.path.join(destination,
                                        os.path.basename(sourcepath))
        else:
            arrange_path = destination
        relative_path = sourcepath.replace(DEFAULT_BACKLOG_PATH, '', 1)
        try:
            file_info = storage_service.get_file_metadata(
                relative_path=relative_path)[0]
        except storage_service.ResourceNotFound:
            raise ValueError(
                _('No file information returned from the Storage Service for file at relative_path: %(path)s'
                  ) % {'path': relative_path})
        file_uuid = file_info.get('fileuuid')
        transfer_uuid = file_info.get('sipuuid')
        to_add.append({
            'original_path': sourcepath,
            'arrange_path': arrange_path,
            'file_uuid': file_uuid,
            'transfer_uuid': transfer_uuid
        })

    logger.info('arrange_path: %s', arrange_path)
    logger.debug('files to be added: %s', to_add)

    for entry in to_add:
        try:
            # TODO enforce uniqueness on arrange panel?
            models.SIPArrange.objects.create(
                original_path=entry['original_path'],
                arrange_path=entry['arrange_path'],
                file_uuid=entry['file_uuid'],
                transfer_uuid=entry['transfer_uuid'],
            )
        except IntegrityError:
            # FIXME Expecting this to catch duplicate original_paths, which
            # we want to ignore since a file can only be in one SIP.  Needs
            # to be updated not to ignore other classes of IntegrityErrors.
            logger.exception('Integrity error inserting: %s', entry)