예제 #1
0
 def __stream_dataset_collection(self, trans, dataset_collection_instance):
     archive_type_string = 'w|gz'
     archive_ext = 'tgz'
     if self.app.config.upstream_gzip:
         archive_type_string = 'w|'
         archive_ext = 'tar'
     archive = StreamBall(mode=archive_type_string)
     names, hdas = get_hda_and_element_identifiers(dataset_collection_instance)
     for name, hda in zip(names, hdas):
         if hda.state != hda.states.OK:
             continue
         for file_path, relpath in hda.datatype.to_archive(trans=trans, dataset=hda, name=name):
             archive.add(file=file_path, relpath=relpath)
     archive_name = "%s: %s.%s" % (dataset_collection_instance.hid, dataset_collection_instance.name, archive_ext)
     trans.response.set_content_type("application/x-tar")
     trans.response.headers["Content-Disposition"] = 'attachment; filename="{}"'.format(archive_name)
     archive.wsgi_status = trans.response.wsgi_status()
     archive.wsgi_headeritems = trans.response.wsgi_headeritems()
     return archive.stream
예제 #2
0
 def __stream_dataset_collection(self, trans, dataset_collection_instance):
     archive_type_string = 'w|gz'
     archive_ext = 'tgz'
     if self.app.config.upstream_gzip:
         archive_type_string = 'w|'
         archive_ext = 'tar'
     archive = StreamBall(mode=archive_type_string)
     names, hdas = get_hda_and_element_identifiers(
         dataset_collection_instance)
     for name, hda in zip(names, hdas):
         if hda.state != hda.states.OK:
             continue
         for file_path, relpath in hda.datatype.to_archive(trans=trans,
                                                           dataset=hda,
                                                           name=name):
             archive.add(file=file_path, relpath=relpath)
     archive_name = "%s: %s.%s" % (dataset_collection_instance.hid,
                                   dataset_collection_instance.name,
                                   archive_ext)
     trans.response.set_content_type("application/x-tar")
     trans.response.headers[
         "Content-Disposition"] = 'attachment; filename="{}"'.format(
             archive_name)
     archive.wsgi_status = trans.response.wsgi_status()
     archive.wsgi_headeritems = trans.response.wsgi_headeritems()
     return archive.stream
예제 #3
0
 trantab = string.maketrans( killme, '_'*len( killme ) )
 try:
     outext = 'zip'
     if format == 'zip':
         # Can't use mkstemp - the file must not exist first
         tmpd = tempfile.mkdtemp()
         util.umask_fix_perms( tmpd, trans.app.config.umask, 0777, self.app.config.gid )
         tmpf = os.path.join( tmpd, 'library_download.' + format )
         if trans.app.config.upstream_gzip:
             archive = zipfile.ZipFile( tmpf, 'w', zipfile.ZIP_STORED, True )
         else:
             archive = zipfile.ZipFile( tmpf, 'w', zipfile.ZIP_DEFLATED, True )
         archive.add = lambda x, y: archive.write( x, y.encode( 'CP437' ) )
     elif format == 'tgz':
         if trans.app.config.upstream_gzip:
             archive = StreamBall( 'w|' )
             outext = 'tar'
         else:
             archive = StreamBall( 'w|gz' )
             outext = 'tgz'
     elif format == 'tbz':
         archive = StreamBall( 'w|bz2' )
         outext = 'tbz2'
 except ( OSError, zipfile.BadZipfile ):
     log.exception( "Unable to create archive for download" )
     raise exceptions.InternalServerError( "Unable to create archive for download." )
 except Exception:
     log.exception( "Unexpected error %s in create archive for download" % sys.exc_info()[ 0 ] )
     raise exceptions.InternalServerError( "Unable to create archive for download." )
 composite_extensions = trans.app.datatypes_registry.get_composite_extensions()
 seen = []
예제 #4
0
         # Can't use mkstemp - the file must not exist first
         tmpd = tempfile.mkdtemp()
         util.umask_fix_perms(tmpd, trans.app.config.umask, 0777,
                              self.app.config.gid)
         tmpf = os.path.join(tmpd, 'library_download.' + format)
         if trans.app.config.upstream_gzip:
             archive = zipfile.ZipFile(tmpf, 'w',
                                       zipfile.ZIP_STORED, True)
         else:
             archive = zipfile.ZipFile(tmpf, 'w',
                                       zipfile.ZIP_DEFLATED, True)
         archive.add = lambda x, y: archive.write(
             x, y.encode('CP437'))
     elif format == 'tgz':
         if trans.app.config.upstream_gzip:
             archive = StreamBall('w|')
             outext = 'tar'
         else:
             archive = StreamBall('w|gz')
             outext = 'tgz'
     elif format == 'tbz':
         archive = StreamBall('w|bz2')
         outext = 'tbz2'
 except (OSError, zipfile.BadZipfile):
     log.exception("Unable to create archive for download")
     raise exceptions.InternalServerError(
         "Unable to create archive for download.")
 except Exception:
     log.exception(
         "Unexpected error %s in create archive for download" %
         sys.exc_info()[0])
예제 #5
0
    def download(self, trans, format, **kwd):
        """
        Download requested datasets (identified by encoded IDs) in requested format.

        * GET /api/libraries/datasets/download/{format}
        * POST /api/libraries/datasets/download/{format}

        example: ``GET localhost:8080/api/libraries/datasets/download/tbz?ld_ids%255B%255D=a0d84b45643a2678&ld_ids%255B%255D=fe38c84dcd46c828``

        .. note:: supported format values are: 'zip', 'tgz', 'tbz', 'uncompressed'

        :param  format:      string representing requested archive format
        :type   format:      string
        :param  ld_ids[]:      an array of encoded dataset ids
        :type   ld_ids[]:      an array
        :param  folder_ids[]:      an array of encoded folder ids
        :type   folder_ids[]:      an array

        :returns: either archive with the requested datasets packed inside or a single uncompressed dataset
        :rtype:   file

        :raises: MessageException, ItemDeletionException, ItemAccessibilityException, HTTPBadRequest, OSError, IOError, ObjectNotFound
        """
        library_datasets = []
        datasets_to_download = kwd.get('ld_ids%5B%5D', None)
        if datasets_to_download is None:
            datasets_to_download = kwd.get('ld_ids', None)
        if datasets_to_download is not None:
            datasets_to_download = util.listify(datasets_to_download)
            for dataset_id in datasets_to_download:
                try:
                    library_dataset = self.get_library_dataset(trans, id=dataset_id, check_ownership=False, check_accessible=True)
                    library_datasets.append(library_dataset)
                except HTTPBadRequest:
                    raise exceptions.RequestParameterInvalidException('Bad Request.')
                except HTTPInternalServerError:
                    raise exceptions.InternalServerError('Internal error.')
                except Exception as e:
                    raise exceptions.InternalServerError('Unknown error.' + str(e))

        folders_to_download = kwd.get('folder_ids%5B%5D', None)
        if folders_to_download is None:
            folders_to_download = kwd.get('folder_ids', None)
        if folders_to_download is not None:
            folders_to_download = util.listify(folders_to_download)

            current_user_roles = trans.get_current_user_roles()

            def traverse(folder):
                admin = trans.user_is_admin()
                rval = []
                for subfolder in folder.active_folders:
                    if not admin:
                        can_access, folder_ids = trans.app.security_agent.check_folder_contents(trans.user, current_user_roles, subfolder)
                    if (admin or can_access) and not subfolder.deleted:
                        rval.extend(traverse(subfolder))
                for ld in folder.datasets:
                    if not admin:
                        can_access = trans.app.security_agent.can_access_dataset(
                            current_user_roles,
                            ld.library_dataset_dataset_association.dataset
                        )
                    if (admin or can_access) and not ld.deleted:
                        rval.append(ld)
                return rval

            for encoded_folder_id in folders_to_download:
                folder_id = self.folder_manager.cut_and_decode(trans, encoded_folder_id)
                folder = self.folder_manager.get(trans, folder_id)
                library_datasets.extend(traverse(folder))

        if not library_datasets:
            raise exceptions.RequestParameterMissingException('Request has to contain a list of dataset ids or folder ids to download.')

        if format in ['zip', 'tgz', 'tbz']:
            # error = False
            killme = string.punctuation + string.whitespace
            trantab = string.maketrans(killme, '_' * len(killme))
            try:
                outext = 'zip'
                if format == 'zip':
                    # Can't use mkstemp - the file must not exist first
                    tmpd = tempfile.mkdtemp()
                    util.umask_fix_perms(tmpd, trans.app.config.umask, 0777, self.app.config.gid)
                    tmpf = os.path.join(tmpd, 'library_download.' + format)
                    if trans.app.config.upstream_gzip:
                        archive = zipfile.ZipFile(tmpf, 'w', zipfile.ZIP_STORED, True)
                    else:
                        archive = zipfile.ZipFile(tmpf, 'w', zipfile.ZIP_DEFLATED, True)
                    archive.add = lambda x, y: archive.write(x, y.encode('CP437'))
                elif format == 'tgz':
                    if trans.app.config.upstream_gzip:
                        archive = StreamBall('w|')
                        outext = 'tar'
                    else:
                        archive = StreamBall('w|gz')
                        outext = 'tgz'
                elif format == 'tbz':
                    archive = StreamBall('w|bz2')
                    outext = 'tbz2'
            except (OSError, zipfile.BadZipfile):
                log.exception("Unable to create archive for download")
                raise exceptions.InternalServerError("Unable to create archive for download.")
            except Exception:
                log.exception("Unexpected error in create archive for download")
                raise exceptions.InternalServerError("Unable to create archive for download.")
            composite_extensions = trans.app.datatypes_registry.get_composite_extensions()
            seen = []
            for ld in library_datasets:
                ldda = ld.library_dataset_dataset_association
                ext = ldda.extension
                is_composite = ext in composite_extensions
                path = ""
                parent_folder = ldda.library_dataset.folder
                while parent_folder is not None:
                    # Exclude the now-hidden "root folder"
                    if parent_folder.parent is None:
                        path = os.path.join(parent_folder.library_root[0].name, path)
                        break
                    path = os.path.join(parent_folder.name, path)
                    parent_folder = parent_folder.parent
                path += ldda.name
                while path in seen:
                    path += '_'
                seen.append(path)
                zpath = os.path.split(path)[-1]  # comes as base_name/fname
                outfname, zpathext = os.path.splitext(zpath)

                if is_composite:
                    # need to add all the components from the extra_files_path to the zip
                    if zpathext == '':
                        zpath = '%s.html' % zpath  # fake the real nature of the html file
                    try:
                        if format == 'zip':
                            archive.add(ldda.dataset.file_name, zpath)  # add the primary of a composite set
                        else:
                            archive.add(ldda.dataset.file_name, zpath, check_file=True)  # add the primary of a composite set
                    except IOError:
                        log.exception("Unable to add composite parent %s to temporary library download archive", ldda.dataset.file_name)
                        raise exceptions.InternalServerError("Unable to create archive for download.")
                    except ObjectNotFound:
                        log.exception("Requested dataset %s does not exist on the host.", ldda.dataset.file_name)
                        raise exceptions.ObjectNotFound("Requested dataset not found. ")
                    except Exception as e:
                        log.exception("Unable to add composite parent %s to temporary library download archive", ldda.dataset.file_name)
                        raise exceptions.InternalServerError("Unable to add composite parent to temporary library download archive. " + str(e))

                    flist = glob.glob(os.path.join(ldda.dataset.extra_files_path, '*.*'))  # glob returns full paths
                    for fpath in flist:
                        efp, fname = os.path.split(fpath)
                        if fname > '':
                            fname = fname.translate(trantab)
                        try:
                            if format == 'zip':
                                archive.add(fpath, fname)
                            else:
                                archive.add(fpath, fname, check_file=True)
                        except IOError:
                            log.exception("Unable to add %s to temporary library download archive %s", fname, outfname)
                            raise exceptions.InternalServerError("Unable to create archive for download.")
                        except ObjectNotFound:
                            log.exception("Requested dataset %s does not exist on the host.", fpath)
                            raise exceptions.ObjectNotFound("Requested dataset not found.")
                        except Exception as e:
                            log.exception("Unable to add %s to temporary library download archive %s" % (fname, outfname))
                            raise exceptions.InternalServerError("Unable to add dataset to temporary library download archive . " + str(e))
                else:
                    try:
                        if format == 'zip':
                            archive.add(ldda.dataset.file_name, path)
                        else:
                            archive.add(ldda.dataset.file_name, path, check_file=True)
                    except IOError:
                        log.exception("Unable to write %s to temporary library download archive", ldda.dataset.file_name)
                        raise exceptions.InternalServerError("Unable to create archive for download")
                    except ObjectNotFound:
                        log.exception("Requested dataset %s does not exist on the host.", ldda.dataset.file_name)
                        raise exceptions.ObjectNotFound("Requested dataset not found.")
                    except Exception as e:
                        log.exception("Unable to add %s to temporary library download archive %s", fname, outfname)
                        raise exceptions.InternalServerError("Unknown error. " + str(e))
            lname = 'selected_dataset'
            fname = lname.replace(' ', '_') + '_files'
            if format == 'zip':
                archive.close()
                trans.response.set_content_type("application/octet-stream")
                trans.response.headers["Content-Disposition"] = 'attachment; filename="%s.%s"' % (fname, outext)
                archive = util.streamball.ZipBall(tmpf, tmpd)
                archive.wsgi_status = trans.response.wsgi_status()
                archive.wsgi_headeritems = trans.response.wsgi_headeritems()
                return archive.stream
            else:
                trans.response.set_content_type("application/x-tar")
                trans.response.headers["Content-Disposition"] = 'attachment; filename="%s.%s"' % (fname, outext)
                archive.wsgi_status = trans.response.wsgi_status()
                archive.wsgi_headeritems = trans.response.wsgi_headeritems()
                return archive.stream
        elif format == 'uncompressed':
            if len(library_datasets) != 1:
                raise exceptions.RequestParameterInvalidException("You can download only one uncompressed file at once.")
            else:
                single_ld = library_datasets[0]
                ldda = single_ld.library_dataset_dataset_association
                dataset = ldda.dataset
                fStat = os.stat(dataset.file_name)
                trans.response.set_content_type(ldda.get_mime())
                trans.response.headers['Content-Length'] = int(fStat.st_size)
                fname = ldda.name
                fname = ''.join(c in util.FILENAME_VALID_CHARS and c or '_' for c in fname)[0:150]
                trans.response.headers["Content-Disposition"] = 'attachment; filename="%s"' % fname
                try:
                    return open(dataset.file_name)
                except:
                    raise exceptions.InternalServerError("This dataset contains no content.")
        else:
            raise exceptions.RequestParameterInvalidException("Wrong format parameter specified")
예제 #6
0
    def download(self, trans, format, **kwd):
        """
        GET /api/libraries/datasets/download/{format}
        POST /api/libraries/datasets/download/{format}

        Download requested datasets (identified by encoded IDs) in requested format.

        example: ``GET localhost:8080/api/libraries/datasets/download/tbz?ld_ids%255B%255D=a0d84b45643a2678&ld_ids%255B%255D=fe38c84dcd46c828``

        .. note:: supported format values are: 'zip', 'tgz', 'tbz', 'uncompressed'

        :param  format:      string representing requested archive format
        :type   format:      string
        :param  ld_ids[]:      an array of encoded dataset ids
        :type   ld_ids[]:      an array
        :param  folder_ids[]:      an array of encoded folder ids
        :type   folder_ids[]:      an array

        :returns: either archive with the requested datasets packed inside or a single uncompressed dataset
        :rtype:   file

        :raises: MessageException, ItemDeletionException, ItemAccessibilityException, HTTPBadRequest, OSError, IOError, ObjectNotFound
        """
        library_datasets = []
        datasets_to_download = kwd.get('ld_ids%5B%5D', None)
        if datasets_to_download is None:
            datasets_to_download = kwd.get('ld_ids', None)
        if datasets_to_download is not None:
            datasets_to_download = util.listify(datasets_to_download)
            for dataset_id in datasets_to_download:
                try:
                    library_dataset = self.get_library_dataset(
                        trans,
                        id=dataset_id,
                        check_ownership=False,
                        check_accessible=True)
                    library_datasets.append(library_dataset)
                except HTTPBadRequest:
                    raise exceptions.RequestParameterInvalidException(
                        'Bad Request.')
                except HTTPInternalServerError:
                    raise exceptions.InternalServerError('Internal error.')
                except Exception as e:
                    raise exceptions.InternalServerError('Unknown error.' +
                                                         util.unicodify(e))

        folders_to_download = kwd.get('folder_ids%5B%5D', None)
        if folders_to_download is None:
            folders_to_download = kwd.get('folder_ids', None)
        if folders_to_download is not None:
            folders_to_download = util.listify(folders_to_download)

            current_user_roles = trans.get_current_user_roles()

            def traverse(folder):
                admin = trans.user_is_admin
                rval = []
                for subfolder in folder.active_folders:
                    if not admin:
                        can_access, folder_ids = trans.app.security_agent.check_folder_contents(
                            trans.user, current_user_roles, subfolder)
                    if (admin or can_access) and not subfolder.deleted:
                        rval.extend(traverse(subfolder))
                for ld in folder.datasets:
                    if not admin:
                        can_access = trans.app.security_agent.can_access_dataset(
                            current_user_roles,
                            ld.library_dataset_dataset_association.dataset)
                    if (admin or can_access) and not ld.deleted:
                        rval.append(ld)
                return rval

            for encoded_folder_id in folders_to_download:
                folder_id = self.folder_manager.cut_and_decode(
                    trans, encoded_folder_id)
                folder = self.folder_manager.get(trans, folder_id)
                library_datasets.extend(traverse(folder))

        if not library_datasets:
            raise exceptions.RequestParameterMissingException(
                'Request has to contain a list of dataset ids or folder ids to download.'
            )

        if format in ['zip', 'tgz', 'tbz']:
            # error = False
            killme = string.punctuation + string.whitespace
            trantab = string.maketrans(killme, '_' * len(killme))
            try:
                outext = 'zip'
                if format == 'zip':
                    # Can't use mkstemp - the file must not exist first
                    tmpd = tempfile.mkdtemp()
                    util.umask_fix_perms(tmpd, trans.app.config.umask, 0o777,
                                         self.app.config.gid)
                    tmpf = os.path.join(tmpd, 'library_download.' + format)
                    if trans.app.config.upstream_gzip:
                        archive = zipfile.ZipFile(tmpf, 'w',
                                                  zipfile.ZIP_STORED, True)
                    else:
                        archive = zipfile.ZipFile(tmpf, 'w',
                                                  zipfile.ZIP_DEFLATED, True)
                    archive.add = lambda x, y: archive.write(
                        x, y.encode('CP437'))
                elif format == 'tgz':
                    if trans.app.config.upstream_gzip:
                        archive = StreamBall('w|')
                        outext = 'tar'
                    else:
                        archive = StreamBall('w|gz')
                        outext = 'tgz'
                elif format == 'tbz':
                    archive = StreamBall('w|bz2')
                    outext = 'tbz2'
            except (OSError, zipfile.BadZipfile):
                log.exception("Unable to create archive for download")
                raise exceptions.InternalServerError(
                    "Unable to create archive for download.")
            except Exception:
                log.exception(
                    "Unexpected error in create archive for download")
                raise exceptions.InternalServerError(
                    "Unable to create archive for download.")
            composite_extensions = trans.app.datatypes_registry.get_composite_extensions(
            )
            seen = []
            for ld in library_datasets:
                ldda = ld.library_dataset_dataset_association
                ext = ldda.extension
                is_composite = ext in composite_extensions
                path = ""
                parent_folder = ldda.library_dataset.folder
                while parent_folder is not None:
                    # Exclude the now-hidden "root folder"
                    if parent_folder.parent is None:
                        path = os.path.join(parent_folder.library_root[0].name,
                                            path)
                        break
                    path = os.path.join(parent_folder.name, path)
                    parent_folder = parent_folder.parent
                path += ldda.name
                while path in seen:
                    path += '_'
                path = "{path}.{extension}".format(path=path,
                                                   extension=ldda.extension)
                seen.append(path)
                zpath = os.path.split(path)[-1]  # comes as base_name/fname
                outfname, zpathext = os.path.splitext(zpath)

                if is_composite:
                    # need to add all the components from the extra_files_path to the zip
                    if zpathext == '':
                        zpath = '%s.html' % zpath  # fake the real nature of the html file
                    try:
                        if format == 'zip':
                            archive.add(
                                ldda.dataset.file_name,
                                zpath)  # add the primary of a composite set
                        else:
                            archive.add(ldda.dataset.file_name,
                                        zpath,
                                        check_file=True
                                        )  # add the primary of a composite set
                    except IOError:
                        log.exception(
                            "Unable to add composite parent %s to temporary library download archive",
                            ldda.dataset.file_name)
                        raise exceptions.InternalServerError(
                            "Unable to create archive for download.")
                    except ObjectNotFound:
                        log.exception(
                            "Requested dataset %s does not exist on the host.",
                            ldda.dataset.file_name)
                        raise exceptions.ObjectNotFound(
                            "Requested dataset not found. ")
                    except Exception as e:
                        log.exception(
                            "Unable to add composite parent %s to temporary library download archive",
                            ldda.dataset.file_name)
                        raise exceptions.InternalServerError(
                            "Unable to add composite parent to temporary library download archive. "
                            + util.unicodify(e))

                    flist = glob.glob(
                        os.path.join(ldda.dataset.extra_files_path,
                                     '*.*'))  # glob returns full paths
                    for fpath in flist:
                        efp, fname = os.path.split(fpath)
                        if fname > '':
                            fname = fname.translate(trantab)
                        try:
                            if format == 'zip':
                                archive.add(fpath, fname)
                            else:
                                archive.add(fpath, fname, check_file=True)
                        except IOError:
                            log.exception(
                                "Unable to add %s to temporary library download archive %s",
                                fname, outfname)
                            raise exceptions.InternalServerError(
                                "Unable to create archive for download.")
                        except ObjectNotFound:
                            log.exception(
                                "Requested dataset %s does not exist on the host.",
                                fpath)
                            raise exceptions.ObjectNotFound(
                                "Requested dataset not found.")
                        except Exception as e:
                            log.exception(
                                "Unable to add %s to temporary library download archive %s",
                                fname, outfname)
                            raise exceptions.InternalServerError(
                                "Unable to add dataset to temporary library download archive . "
                                + util.unicodify(e))
                else:
                    try:
                        if format == 'zip':
                            archive.add(ldda.dataset.file_name, path)
                        else:
                            archive.add(ldda.dataset.file_name,
                                        path,
                                        check_file=True)
                    except IOError:
                        log.exception(
                            "Unable to write %s to temporary library download archive",
                            ldda.dataset.file_name)
                        raise exceptions.InternalServerError(
                            "Unable to create archive for download")
                    except ObjectNotFound:
                        log.exception(
                            "Requested dataset %s does not exist on the host.",
                            ldda.dataset.file_name)
                        raise exceptions.ObjectNotFound(
                            "Requested dataset not found.")
                    except Exception as e:
                        log.exception(
                            "Unable to add %s to temporary library download archive %s",
                            ldda.dataset.file_name, outfname)
                        raise exceptions.InternalServerError(
                            "Unknown error. " + util.unicodify(e))
            lname = 'selected_dataset'
            fname = lname.replace(' ', '_') + '_files'
            if format == 'zip':
                archive.close()
                trans.response.set_content_type("application/octet-stream")
                trans.response.headers[
                    "Content-Disposition"] = 'attachment; filename="%s.%s"' % (
                        fname, outext)
                archive = util.streamball.ZipBall(tmpf, tmpd)
                archive.wsgi_status = trans.response.wsgi_status()
                archive.wsgi_headeritems = trans.response.wsgi_headeritems()
                return archive.stream
            else:
                trans.response.set_content_type("application/x-tar")
                trans.response.headers[
                    "Content-Disposition"] = 'attachment; filename="%s.%s"' % (
                        fname, outext)
                archive.wsgi_status = trans.response.wsgi_status()
                archive.wsgi_headeritems = trans.response.wsgi_headeritems()
                return archive.stream
        elif format == 'uncompressed':
            if len(library_datasets) != 1:
                raise exceptions.RequestParameterInvalidException(
                    "You can download only one uncompressed file at once.")
            else:
                single_ld = library_datasets[0]
                ldda = single_ld.library_dataset_dataset_association
                dataset = ldda.dataset
                fStat = os.stat(dataset.file_name)
                trans.response.set_content_type(ldda.get_mime())
                trans.response.headers['Content-Length'] = int(fStat.st_size)
                fname = "{path}.{extension}".format(path=ldda.name,
                                                    extension=ldda.extension)
                fname = ''.join(c in util.FILENAME_VALID_CHARS and c or '_'
                                for c in fname)[0:150]
                trans.response.headers[
                    "Content-Disposition"] = 'attachment; filename="%s"' % fname
                try:
                    return open(dataset.file_name, 'rb')
                except Exception:
                    raise exceptions.InternalServerError(
                        "This dataset contains no content.")
        else:
            raise exceptions.RequestParameterInvalidException(
                "Wrong format parameter specified")
예제 #7
0
    def archive(self, trans, history_id, filename='', format='tgz', dry_run=True, **kwd):
        """
        archive( self, trans, history_id, filename='', format='tgz', dry_run=True, **kwd )
        * GET /api/histories/{history_id}/contents/archive/{id}
        * GET /api/histories/{history_id}/contents/archive/{filename}.{format}
            build and return a compressed archive of the selected history contents

        :type   filename:  string
        :param  filename:  (optional) archive name (defaults to history name)
        :type   dry_run:   boolean
        :param  dry_run:   (optional) if True, return the archive and file paths only
                           as json and not an archive file

        :returns:   archive file for download

        .. note:: this is a volatile endpoint and settings and behavior may change.
        """
        # roughly from: http://stackoverflow.com/a/31976060 (windows, linux)
        invalid_filename_char_regex = re.compile(r'[:<>|\\\/\?\* "]')
        # path format string - dot separator between id and name
        id_name_format = u'{}.{}'

        def name_to_filename(name, max_length=150, replace_with=u'_'):
            # TODO: seems like shortening unicode with [:] would cause unpredictable display strings
            return invalid_filename_char_regex.sub(replace_with, name)[0:max_length]

        # given a set of parents for a dataset (HDCAs, DC, DCEs, etc.) - build a directory structure that
        # (roughly) recreates the nesting in the contents using the parent names and ids
        def build_path_from_parents(parents):
            parent_names = []
            for parent in parents:
                # an HDCA
                if hasattr(parent, 'hid'):
                    name = name_to_filename(parent.name)
                    parent_names.append(id_name_format.format(parent.hid, name))
                # a DCE
                elif hasattr(parent, 'element_index'):
                    name = name_to_filename(parent.element_identifier)
                    parent_names.append(id_name_format.format(parent.element_index, name))
            # NOTE: DCs are skipped and use the wrapping DCE info instead
            return parent_names

        # get the history used for the contents query and check for accessibility
        history = self.history_manager.get_accessible(trans.security.decode_id(history_id), trans.user)
        archive_base_name = filename or name_to_filename(history.name)

        # this is the fn applied to each dataset contained in the query
        paths_and_files = []

        def build_archive_files_and_paths(content, *parents):
            archive_path = archive_base_name
            if not self.hda_manager.is_accessible(content, trans.user):
                # if the underlying dataset is not accessible, skip it silently
                return

            content_container_id = content.hid
            content_name = name_to_filename(content.name)
            if parents:
                if hasattr(parents[0], 'element_index'):
                    # if content is directly wrapped in a DCE, strip it from parents (and the resulting path)
                    # and instead replace the content id and name with the DCE index and identifier
                    parent_dce, parents = parents[0], parents[1:]
                    content_container_id = parent_dce.element_index
                    content_name = name_to_filename(parent_dce.element_identifier)
                # reverse for path from parents: oldest parent first
                archive_path = os.path.join(archive_path, *build_path_from_parents(parents)[::-1])
                # TODO: this is brute force - building the path each time instead of re-using it
                # possibly cache

            # add the name as the last element in the archive path
            content_id_and_name = id_name_format.format(content_container_id, content_name)
            archive_path = os.path.join(archive_path, content_id_and_name)

            # ---- for composite files, we use id and name for a directory and, inside that, ...
            if self.hda_manager.is_composite(content):
                # ...save the 'main' composite file (gen. html)
                paths_and_files.append((content.file_name, os.path.join(archive_path, content.name + '.html')))
                for extra_file in self.hda_manager.extra_files(content):
                    extra_file_basename = os.path.basename(extra_file)
                    archive_extra_file_path = os.path.join(archive_path, extra_file_basename)
                    # ...and one for each file in the composite
                    paths_and_files.append((extra_file, archive_extra_file_path))

            # ---- for single files, we add the true extension to id and name and store that single filename
            else:
                # some dataset names can contain their original file extensions, don't repeat
                if not archive_path.endswith('.' + content.extension):
                    archive_path += '.' + content.extension
                paths_and_files.append((content.file_name, archive_path))

        # filter the contents that contain datasets using any filters possible from index above and map the datasets
        filter_params = self.parse_filter_params(kwd)
        filters = self.history_contents_filters.parse_filters(filter_params)
        self.history_contents_manager.map_datasets(history, build_archive_files_and_paths, filters=filters)

        # if dry_run, return the structure as json for debugging
        if dry_run == 'True':
            trans.response.headers['Content-Type'] = 'application/json'
            return safe_dumps(paths_and_files)

        # create the archive, add the dataset files, then stream the archive as a download
        archive_type_string = 'w|gz'
        archive_ext = 'tgz'
        if self.app.config.upstream_gzip:
            archive_type_string = 'w|'
            archive_ext = 'tar'
        archive = StreamBall(archive_type_string)

        for file_path, archive_path in paths_and_files:
            archive.add(file_path, archive_path)

        archive_name = '.'.join([archive_base_name, archive_ext])
        trans.response.set_content_type("application/x-tar")
        trans.response.headers["Content-Disposition"] = 'attachment; filename="{}"'.format(archive_name)
        archive.wsgi_status = trans.response.wsgi_status()
        archive.wsgi_headeritems = trans.response.wsgi_headeritems()
        return archive.stream
예제 #8
0
    def archive(self,
                trans,
                history_id,
                filename='',
                format='tgz',
                dry_run=True,
                **kwd):
        """
        archive( self, trans, history_id, filename='', format='tgz', dry_run=True, **kwd )
        * GET /api/histories/{history_id}/contents/archive/{id}
        * GET /api/histories/{history_id}/contents/archive/{filename}.{format}
            build and return a compressed archive of the selected history contents

        :type   filename:  string
        :param  filename:  (optional) archive name (defaults to history name)
        :type   dry_run:   boolean
        :param  dry_run:   (optional) if True, return the archive and file paths only
                           as json and not an archive file

        :returns:   archive file for download

        .. note: this is a volatile endpoint and settings and behavior may change.
        """
        # roughly from: http://stackoverflow.com/a/31976060 (windows, linux)
        invalid_filename_char_regex = re.compile(r'[:<>|\\\/\?\* "]')
        # path format string - dot separator between id and name
        id_name_format = u'{}.{}'

        def name_to_filename(name, max_length=150, replace_with=u'_'):
            # TODO: seems like shortening unicode with [:] would cause unpredictable display strings
            return invalid_filename_char_regex.sub(replace_with,
                                                   name)[0:max_length]

        # given a set of parents for a dataset (HDCAs, DC, DCEs, etc.) - build a directory structure that
        # (roughly) recreates the nesting in the contents using the parent names and ids
        def build_path_from_parents(parents):
            parent_names = []
            for parent in parents:
                # an HDCA
                if hasattr(parent, 'hid'):
                    name = name_to_filename(parent.name)
                    parent_names.append(id_name_format.format(
                        parent.hid, name))
                # a DCE
                elif hasattr(parent, 'element_index'):
                    name = name_to_filename(parent.element_identifier)
                    parent_names.append(
                        id_name_format.format(parent.element_index, name))
            # NOTE: DCs are skipped and use the wrapping DCE info instead
            return parent_names

        # get the history used for the contents query and check for accessibility
        history = self.history_manager.get_accessible(
            trans.security.decode_id(history_id), trans.user)
        archive_base_name = filename or name_to_filename(history.name)

        # this is the fn applied to each dataset contained in the query
        paths_and_files = []

        def build_archive_files_and_paths(content, *parents):
            archive_path = archive_base_name
            if not self.hda_manager.is_accessible(content, trans.user):
                # if the underlying dataset is not accessible, skip it silently
                return

            content_container_id = content.hid
            content_name = name_to_filename(content.name)
            if parents:
                if hasattr(parents[0], 'element_index'):
                    # if content is directly wrapped in a DCE, strip it from parents (and the resulting path)
                    # and instead replace the content id and name with the DCE index and identifier
                    parent_dce, parents = parents[0], parents[1:]
                    content_container_id = parent_dce.element_index
                    content_name = name_to_filename(
                        parent_dce.element_identifier)
                # reverse for path from parents: oldest parent first
                archive_path = os.path.join(
                    archive_path,
                    *build_path_from_parents(parents)[::-1])
                # TODO: this is brute force - building the path each time instead of re-using it
                # possibly cache

            # add the name as the last element in the archive path
            content_id_and_name = id_name_format.format(
                content_container_id, content_name)
            archive_path = os.path.join(archive_path, content_id_and_name)

            # ---- for composite files, we use id and name for a directory and, inside that, ...
            if self.hda_manager.is_composite(content):
                # ...save the 'main' composite file (gen. html)
                paths_and_files.append((content.file_name,
                                        os.path.join(archive_path,
                                                     content.name + '.html')))
                for extra_file in self.hda_manager.extra_files(content):
                    extra_file_basename = os.path.basename(extra_file)
                    archive_extra_file_path = os.path.join(
                        archive_path, extra_file_basename)
                    # ...and one for each file in the composite
                    paths_and_files.append(
                        (extra_file, archive_extra_file_path))

            # ---- for single files, we add the true extension to id and name and store that single filename
            else:
                # some dataset names can contain their original file extensions, don't repeat
                if not archive_path.endswith('.' + content.extension):
                    archive_path += '.' + content.extension
                paths_and_files.append((content.file_name, archive_path))

        # filter the contents that contain datasets using any filters possible from index above and map the datasets
        filter_params = self.parse_filter_params(kwd)
        filters = self.history_contents_filters.parse_filters(filter_params)
        self.history_contents_manager.map_datasets(
            history, build_archive_files_and_paths, filters=filters)

        # if dry_run, return the structure as json for debugging
        if dry_run == 'True':
            trans.response.headers['Content-Type'] = 'application/json'
            return safe_dumps(paths_and_files)

        # create the archive, add the dataset files, then stream the archive as a download
        archive_type_string = 'w|gz'
        archive_ext = 'tgz'
        if self.app.config.upstream_gzip:
            archive_type_string = 'w|'
            archive_ext = 'tar'
        archive = StreamBall(archive_type_string)

        for file_path, archive_path in paths_and_files:
            archive.add(file_path, archive_path)

        archive_name = '.'.join([archive_base_name, archive_ext])
        trans.response.set_content_type("application/x-tar")
        trans.response.headers[
            "Content-Disposition"] = 'attachment; filename="{}"'.format(
                archive_name)
        archive.wsgi_status = trans.response.wsgi_status()
        archive.wsgi_headeritems = trans.response.wsgi_headeritems()
        return archive.stream