Esempio n. 1
0
 def _update_from_file(self, obj, file_name=None, create=False, **kwargs):
     """`create` parameter is not used in this implementation."""
     preserve_symlinks = kwargs.pop('preserve_symlinks', False)
     # FIXME: symlinks and the object store model may not play well together
     # these should be handled better, e.g. registering the symlink'd file
     # as an object
     if create:
         self._create(obj, **kwargs)
     if file_name and self._exists(obj, **kwargs):
         try:
             if preserve_symlinks and os.path.islink(file_name):
                 force_symlink(os.readlink(file_name),
                               self._get_filename(obj, **kwargs))
             else:
                 path = self._get_filename(obj, **kwargs)
                 shutil.copy(file_name, path)
                 umask_fix_perms(path, self.config.umask, 0o666)
         except shutil.SameFileError:
             # That's ok, we need to ignore this so that remote object stores can update
             # the remote object from the cache file path
             pass
         except OSError as ex:
             log.critical(
                 f'Error copying {file_name} to {self.__get_filename(obj, **kwargs)}: {ex}'
             )
             raise ex
Esempio n. 2
0
 def _fix_permissions(self, rel_path):
     """ Set permissions on rel_path"""
     for basedir, _, files in os.walk(rel_path):
         umask_fix_perms(basedir, self.config.umask, 0o777, self.config.gid)
         for filename in files:
             path = os.path.join(basedir, filename)
             # Ignore symlinks
             if os.path.islink(path):
                 continue
             umask_fix_perms( path, self.config.umask, 0o666, self.config.gid )
Esempio n. 3
0
 def _fix_permissions(self, rel_path):
     """ Set permissions on rel_path"""
     for basedir, _, files in os.walk(rel_path):
         umask_fix_perms(basedir, self.config.umask, 0o777, self.config.gid)
         for filename in files:
             path = os.path.join(basedir, filename)
             # Ignore symlinks
             if os.path.islink(path):
                 continue
             umask_fix_perms(path, self.config.umask, 0o666, self.config.gid)
Esempio n. 4
0
 def create(self, obj, **kwargs):
     """Override `ObjectStore`'s stub by creating any files and folders on disk."""
     if not self.exists(obj, **kwargs):
         path = self._construct_path(obj, **kwargs)
         dir_only = kwargs.get('dir_only', False)
         # Create directory if it does not exist
         dir = path if dir_only else os.path.dirname(path)
         safe_makedirs(dir)
         # Create the file if it does not exist
         if not dir_only:
             open(path, 'w').close()  # Should be rb?
             umask_fix_perms(path, self.config.umask, 0o666)
Esempio n. 5
0
 def create(self, obj, **kwargs):
     if not self.exists(obj, **kwargs):
         path = self._construct_path(obj, **kwargs)
         dir_only = kwargs.get('dir_only', False)
         # Create directory if it does not exist
         dir = path if dir_only else os.path.dirname(path)
         if not os.path.exists(dir):
             os.makedirs(dir)
         # Create the file if it does not exist
         if not dir_only:
             open(path, 'w').close()  # Should be rb?
             umask_fix_perms(path, self.config.umask, 0o666)
 def create(self, obj, **kwargs):
     if not self.exists(obj, **kwargs):
         path = self._construct_path(obj, **kwargs)
         dir_only = kwargs.get('dir_only', False)
         # Create directory if it does not exist
         dir = path if dir_only else os.path.dirname(path)
         if not os.path.exists(dir):
             os.makedirs(dir)
         # Create the file if it does not exist
         if not dir_only:
             open(path, 'w').close()  # Should be rb?
             umask_fix_perms(path, self.config.umask, 0666)
Esempio n. 7
0
 def update_from_file(self, obj, file_name=None, create=False, **kwargs):
     """`create` parameter is not used in this implementation."""
     preserve_symlinks = kwargs.pop('preserve_symlinks', False)
     # FIXME: symlinks and the object store model may not play well together
     # these should be handled better, e.g. registering the symlink'd file
     # as an object
     if create:
         self.create(obj, **kwargs)
     if file_name and self.exists(obj, **kwargs):
         try:
             if preserve_symlinks and os.path.islink(file_name):
                 force_symlink(os.readlink(file_name), self.get_filename(obj, **kwargs))
             else:
                 path = self.get_filename(obj, **kwargs)
                 shutil.move(file_name, path)
                 umask_fix_perms(path, self.config.umask, 0o666)
         except IOError as ex:
             log.critical('Error copying %s to %s: %s' % (file_name, self._get_filename(obj, **kwargs), ex))
             raise ex
Esempio n. 8
0
 def update_from_file(self, obj, file_name=None, create=False, **kwargs):
     """`create` parameter is not used in this implementation."""
     preserve_symlinks = kwargs.pop('preserve_symlinks', False)
     # FIXME: symlinks and the object store model may not play well together
     # these should be handled better, e.g. registering the symlink'd file
     # as an object
     if create:
         self.create(obj, **kwargs)
     if file_name and self.exists(obj, **kwargs):
         try:
             if preserve_symlinks and os.path.islink(file_name):
                 force_symlink(os.readlink(file_name), self.get_filename(obj, **kwargs))
             else:
                 path = self.get_filename(obj, **kwargs)
                 shutil.copy(file_name, path)
                 umask_fix_perms(path, self.config.umask, 0o666)
         except IOError as ex:
             log.critical('Error copying %s to %s: %s' % (file_name, self._get_filename(obj, **kwargs), ex))
             raise ex
Esempio n. 9
0
                    raise exceptions.InternalServerError( 'Internal error.' )
                except Exception, e:
                    raise exceptions.InternalServerError( 'Unknown error.' )
        else:
            raise exceptions.RequestParameterMissingException( 'Request has to contain a list of dataset ids to download.' )

        if format in [ 'zip', 'tgz', 'tbz' ]:
            # error = False
            killme = string.punctuation + string.whitespace
            trantab = string.maketrans( killme, '_'*len( killme ) )
            try:
                outext = 'zip'
                if format == 'zip':
                    # Can't use mkstemp - the file must not exist first
                    tmpd = tempfile.mkdtemp()
                    util.umask_fix_perms( tmpd, trans.app.config.umask, 0777, self.app.config.gid )
                    tmpf = os.path.join( tmpd, 'library_download.' + format )
                    if trans.app.config.upstream_gzip:
                        archive = zipfile.ZipFile( tmpf, 'w', zipfile.ZIP_STORED, True )
                    else:
                        archive = zipfile.ZipFile( tmpf, 'w', zipfile.ZIP_DEFLATED, True )
                    archive.add = lambda x, y: archive.write( x, y.encode( 'CP437' ) )
                elif format == 'tgz':
                    if trans.app.config.upstream_gzip:
                        archive = StreamBall( 'w|' )
                        outext = 'tar'
                    else:
                        archive = StreamBall( 'w|gz' )
                        outext = 'tgz'
                elif format == 'tbz':
                    archive = StreamBall( 'w|bz2' )
Esempio n. 10
0
    def _archive_composite_dataset(self, trans, data=None, **kwd):
        # save a composite object into a compressed archive for downloading
        params = util.Params(kwd)
        outfname = data.name[0:150]
        outfname = ''.join(c in FILENAME_VALID_CHARS and c or '_'
                           for c in outfname)
        if params.do_action is None:
            params.do_action = 'zip'  # default
        msg = util.restore_text(params.get('msg', ''))
        if not data:
            msg = "You must select at least one dataset"
        else:
            error = False
            try:
                if params.do_action == 'zip':
                    # Can't use mkstemp - the file must not exist first
                    tmpd = tempfile.mkdtemp()
                    util.umask_fix_perms(tmpd, trans.app.config.umask, 0o777,
                                         trans.app.config.gid)
                    tmpf = os.path.join(tmpd,
                                        'library_download.' + params.do_action)
                    archive = zipfile.ZipFile(tmpf, 'w', zipfile.ZIP_DEFLATED,
                                              True)
                    archive.add = lambda x, y: archive.write(
                        x, y.encode('CP437'))
                elif params.do_action == 'tgz':
                    archive = util.streamball.StreamBall('w|gz')
                elif params.do_action == 'tbz':
                    archive = util.streamball.StreamBall('w|bz2')
            except (OSError, zipfile.BadZipFile):
                error = True
                log.exception("Unable to create archive for download")
                msg = "Unable to create archive for %s for download, please report this error" % outfname
            if not error:
                ext = data.extension
                path = data.file_name
                fname = os.path.split(path)[-1]
                efp = data.extra_files_path
                # Add any central file to the archive,

                display_name = os.path.splitext(outfname)[0]
                if not display_name.endswith(ext):
                    display_name = '%s_%s' % (display_name, ext)

                error, msg = self._archive_main_file(archive, display_name,
                                                     path)[:2]
                if not error:
                    # Add any child files to the archive,
                    for root, dirs, files in os.walk(efp):
                        for fname in files:
                            fpath = os.path.join(root, fname)
                            rpath = os.path.relpath(fpath, efp)
                            try:
                                archive.add(fpath, rpath)
                            except IOError:
                                error = True
                                log.exception(
                                    "Unable to add %s to temporary library download archive"
                                    % rpath)
                                msg = "Unable to create archive for download, please report this error"
                                continue
                if not error:
                    if params.do_action == 'zip':
                        archive.close()
                        tmpfh = open(tmpf)
                        # CANNOT clean up - unlink/rmdir was always failing because file handle retained to return - must rely on a cron job to clean up tmp
                        trans.response.set_content_type(
                            "application/x-zip-compressed")
                        trans.response.headers[
                            "Content-Disposition"] = 'attachment; filename="%s.zip"' % outfname
                        return tmpfh
                    else:
                        trans.response.set_content_type("application/x-tar")
                        outext = 'tgz'
                        if params.do_action == 'tbz':
                            outext = 'tbz'
                        trans.response.headers[
                            "Content-Disposition"] = 'attachment; filename="%s.%s"' % (
                                outfname, outext)
                        archive.wsgi_status = trans.response.wsgi_status()
                        archive.wsgi_headeritems = trans.response.wsgi_headeritems(
                        )
                        return archive.stream
        return trans.show_error_message(msg)
Esempio n. 11
0
    def _archive_composite_dataset( self, trans, data=None, **kwd ):
        # save a composite object into a compressed archive for downloading
        params = util.Params( kwd )
        valid_chars = '.,^_-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
        outfname = data.name[0:150]
        outfname = ''.join(c in valid_chars and c or '_' for c in outfname)
        if params.do_action is None:
            params.do_action = 'zip'  # default
        msg = util.restore_text( params.get( 'msg', ''  ) )
        if not data:
            msg = "You must select at least one dataset"
        else:
            error = False
            try:
                if params.do_action == 'zip':
                    # Can't use mkstemp - the file must not exist first
                    tmpd = tempfile.mkdtemp()
                    util.umask_fix_perms( tmpd, trans.app.config.umask, 0o777, trans.app.config.gid )
                    tmpf = os.path.join( tmpd, 'library_download.' + params.do_action )
                    archive = zipfile.ZipFile( tmpf, 'w', zipfile.ZIP_DEFLATED, True )
                    archive.add = lambda x, y: archive.write( x, y.encode('CP437') )
                elif params.do_action == 'tgz':
                    archive = util.streamball.StreamBall( 'w|gz' )
                elif params.do_action == 'tbz':
                    archive = util.streamball.StreamBall( 'w|bz2' )
            except (OSError, zipfile.BadZipFile):
                error = True
                log.exception( "Unable to create archive for download" )
                msg = "Unable to create archive for %s for download, please report this error" % outfname
            if not error:
                ext = data.extension
                path = data.file_name
                fname = os.path.split(path)[-1]
                efp = data.extra_files_path
                # Add any central file to the archive,

                display_name = os.path.splitext(outfname)[0]
                if not display_name.endswith(ext):
                    display_name = '%s_%s' % (display_name, ext)

                error, msg = self._archive_main_file(archive, display_name, path)[:2]
                if not error:
                    # Add any child files to the archive,
                    for root, dirs, files in os.walk(efp):
                        for fname in files:
                            fpath = os.path.join(root, fname)
                            rpath = os.path.relpath(fpath, efp)
                            try:
                                archive.add( fpath, rpath )
                            except IOError:
                                error = True
                                log.exception( "Unable to add %s to temporary library download archive" % rpath)
                                msg = "Unable to create archive for download, please report this error"
                                continue
                if not error:
                    if params.do_action == 'zip':
                        archive.close()
                        tmpfh = open( tmpf )
                        # CANNOT clean up - unlink/rmdir was always failing because file handle retained to return - must rely on a cron job to clean up tmp
                        trans.response.set_content_type( "application/x-zip-compressed" )
                        trans.response.headers[ "Content-Disposition" ] = 'attachment; filename="%s.zip"' % outfname
                        return tmpfh
                    else:
                        trans.response.set_content_type( "application/x-tar" )
                        outext = 'tgz'
                        if params.do_action == 'tbz':
                            outext = 'tbz'
                        trans.response.headers[ "Content-Disposition" ] = 'attachment; filename="%s.%s"' % (outfname, outext)
                        archive.wsgi_status = trans.response.wsgi_status()
                        archive.wsgi_headeritems = trans.response.wsgi_headeritems()
                        return archive.stream
        return trans.show_error_message( msg )
Esempio n. 12
0
    def download(self, trans, format, **kwd):
        """
        Download requested datasets (identified by encoded IDs) in requested format.

        * GET /api/libraries/datasets/download/{format}
        * POST /api/libraries/datasets/download/{format}

        example: ``GET localhost:8080/api/libraries/datasets/download/tbz?ld_ids%255B%255D=a0d84b45643a2678&ld_ids%255B%255D=fe38c84dcd46c828``

        .. note:: supported format values are: 'zip', 'tgz', 'tbz', 'uncompressed'

        :param  format:      string representing requested archive format
        :type   format:      string
        :param  ld_ids[]:      an array of encoded dataset ids
        :type   ld_ids[]:      an array
        :param  folder_ids[]:      an array of encoded folder ids
        :type   folder_ids[]:      an array

        :returns: either archive with the requested datasets packed inside or a single uncompressed dataset
        :rtype:   file

        :raises: MessageException, ItemDeletionException, ItemAccessibilityException, HTTPBadRequest, OSError, IOError, ObjectNotFound
        """
        library_datasets = []
        datasets_to_download = kwd.get('ld_ids%5B%5D', None)
        if datasets_to_download is None:
            datasets_to_download = kwd.get('ld_ids', None)
        if datasets_to_download is not None:
            datasets_to_download = util.listify(datasets_to_download)
            for dataset_id in datasets_to_download:
                try:
                    library_dataset = self.get_library_dataset(trans, id=dataset_id, check_ownership=False, check_accessible=True)
                    library_datasets.append(library_dataset)
                except HTTPBadRequest:
                    raise exceptions.RequestParameterInvalidException('Bad Request.')
                except HTTPInternalServerError:
                    raise exceptions.InternalServerError('Internal error.')
                except Exception as e:
                    raise exceptions.InternalServerError('Unknown error.' + str(e))

        folders_to_download = kwd.get('folder_ids%5B%5D', None)
        if folders_to_download is None:
            folders_to_download = kwd.get('folder_ids', None)
        if folders_to_download is not None:
            folders_to_download = util.listify(folders_to_download)

            current_user_roles = trans.get_current_user_roles()

            def traverse(folder):
                admin = trans.user_is_admin()
                rval = []
                for subfolder in folder.active_folders:
                    if not admin:
                        can_access, folder_ids = trans.app.security_agent.check_folder_contents(trans.user, current_user_roles, subfolder)
                    if (admin or can_access) and not subfolder.deleted:
                        rval.extend(traverse(subfolder))
                for ld in folder.datasets:
                    if not admin:
                        can_access = trans.app.security_agent.can_access_dataset(
                            current_user_roles,
                            ld.library_dataset_dataset_association.dataset
                        )
                    if (admin or can_access) and not ld.deleted:
                        rval.append(ld)
                return rval

            for encoded_folder_id in folders_to_download:
                folder_id = self.folder_manager.cut_and_decode(trans, encoded_folder_id)
                folder = self.folder_manager.get(trans, folder_id)
                library_datasets.extend(traverse(folder))

        if not library_datasets:
            raise exceptions.RequestParameterMissingException('Request has to contain a list of dataset ids or folder ids to download.')

        if format in ['zip', 'tgz', 'tbz']:
            # error = False
            killme = string.punctuation + string.whitespace
            trantab = string.maketrans(killme, '_' * len(killme))
            try:
                outext = 'zip'
                if format == 'zip':
                    # Can't use mkstemp - the file must not exist first
                    tmpd = tempfile.mkdtemp()
                    util.umask_fix_perms(tmpd, trans.app.config.umask, 0777, self.app.config.gid)
                    tmpf = os.path.join(tmpd, 'library_download.' + format)
                    if trans.app.config.upstream_gzip:
                        archive = zipfile.ZipFile(tmpf, 'w', zipfile.ZIP_STORED, True)
                    else:
                        archive = zipfile.ZipFile(tmpf, 'w', zipfile.ZIP_DEFLATED, True)
                    archive.add = lambda x, y: archive.write(x, y.encode('CP437'))
                elif format == 'tgz':
                    if trans.app.config.upstream_gzip:
                        archive = StreamBall('w|')
                        outext = 'tar'
                    else:
                        archive = StreamBall('w|gz')
                        outext = 'tgz'
                elif format == 'tbz':
                    archive = StreamBall('w|bz2')
                    outext = 'tbz2'
            except (OSError, zipfile.BadZipfile):
                log.exception("Unable to create archive for download")
                raise exceptions.InternalServerError("Unable to create archive for download.")
            except Exception:
                log.exception("Unexpected error in create archive for download")
                raise exceptions.InternalServerError("Unable to create archive for download.")
            composite_extensions = trans.app.datatypes_registry.get_composite_extensions()
            seen = []
            for ld in library_datasets:
                ldda = ld.library_dataset_dataset_association
                ext = ldda.extension
                is_composite = ext in composite_extensions
                path = ""
                parent_folder = ldda.library_dataset.folder
                while parent_folder is not None:
                    # Exclude the now-hidden "root folder"
                    if parent_folder.parent is None:
                        path = os.path.join(parent_folder.library_root[0].name, path)
                        break
                    path = os.path.join(parent_folder.name, path)
                    parent_folder = parent_folder.parent
                path += ldda.name
                while path in seen:
                    path += '_'
                seen.append(path)
                zpath = os.path.split(path)[-1]  # comes as base_name/fname
                outfname, zpathext = os.path.splitext(zpath)

                if is_composite:
                    # need to add all the components from the extra_files_path to the zip
                    if zpathext == '':
                        zpath = '%s.html' % zpath  # fake the real nature of the html file
                    try:
                        if format == 'zip':
                            archive.add(ldda.dataset.file_name, zpath)  # add the primary of a composite set
                        else:
                            archive.add(ldda.dataset.file_name, zpath, check_file=True)  # add the primary of a composite set
                    except IOError:
                        log.exception("Unable to add composite parent %s to temporary library download archive", ldda.dataset.file_name)
                        raise exceptions.InternalServerError("Unable to create archive for download.")
                    except ObjectNotFound:
                        log.exception("Requested dataset %s does not exist on the host.", ldda.dataset.file_name)
                        raise exceptions.ObjectNotFound("Requested dataset not found. ")
                    except Exception as e:
                        log.exception("Unable to add composite parent %s to temporary library download archive", ldda.dataset.file_name)
                        raise exceptions.InternalServerError("Unable to add composite parent to temporary library download archive. " + str(e))

                    flist = glob.glob(os.path.join(ldda.dataset.extra_files_path, '*.*'))  # glob returns full paths
                    for fpath in flist:
                        efp, fname = os.path.split(fpath)
                        if fname > '':
                            fname = fname.translate(trantab)
                        try:
                            if format == 'zip':
                                archive.add(fpath, fname)
                            else:
                                archive.add(fpath, fname, check_file=True)
                        except IOError:
                            log.exception("Unable to add %s to temporary library download archive %s", fname, outfname)
                            raise exceptions.InternalServerError("Unable to create archive for download.")
                        except ObjectNotFound:
                            log.exception("Requested dataset %s does not exist on the host.", fpath)
                            raise exceptions.ObjectNotFound("Requested dataset not found.")
                        except Exception as e:
                            log.exception("Unable to add %s to temporary library download archive %s" % (fname, outfname))
                            raise exceptions.InternalServerError("Unable to add dataset to temporary library download archive . " + str(e))
                else:
                    try:
                        if format == 'zip':
                            archive.add(ldda.dataset.file_name, path)
                        else:
                            archive.add(ldda.dataset.file_name, path, check_file=True)
                    except IOError:
                        log.exception("Unable to write %s to temporary library download archive", ldda.dataset.file_name)
                        raise exceptions.InternalServerError("Unable to create archive for download")
                    except ObjectNotFound:
                        log.exception("Requested dataset %s does not exist on the host.", ldda.dataset.file_name)
                        raise exceptions.ObjectNotFound("Requested dataset not found.")
                    except Exception as e:
                        log.exception("Unable to add %s to temporary library download archive %s", fname, outfname)
                        raise exceptions.InternalServerError("Unknown error. " + str(e))
            lname = 'selected_dataset'
            fname = lname.replace(' ', '_') + '_files'
            if format == 'zip':
                archive.close()
                trans.response.set_content_type("application/octet-stream")
                trans.response.headers["Content-Disposition"] = 'attachment; filename="%s.%s"' % (fname, outext)
                archive = util.streamball.ZipBall(tmpf, tmpd)
                archive.wsgi_status = trans.response.wsgi_status()
                archive.wsgi_headeritems = trans.response.wsgi_headeritems()
                return archive.stream
            else:
                trans.response.set_content_type("application/x-tar")
                trans.response.headers["Content-Disposition"] = 'attachment; filename="%s.%s"' % (fname, outext)
                archive.wsgi_status = trans.response.wsgi_status()
                archive.wsgi_headeritems = trans.response.wsgi_headeritems()
                return archive.stream
        elif format == 'uncompressed':
            if len(library_datasets) != 1:
                raise exceptions.RequestParameterInvalidException("You can download only one uncompressed file at once.")
            else:
                single_ld = library_datasets[0]
                ldda = single_ld.library_dataset_dataset_association
                dataset = ldda.dataset
                fStat = os.stat(dataset.file_name)
                trans.response.set_content_type(ldda.get_mime())
                trans.response.headers['Content-Length'] = int(fStat.st_size)
                fname = ldda.name
                fname = ''.join(c in util.FILENAME_VALID_CHARS and c or '_' for c in fname)[0:150]
                trans.response.headers["Content-Disposition"] = 'attachment; filename="%s"' % fname
                try:
                    return open(dataset.file_name)
                except:
                    raise exceptions.InternalServerError("This dataset contains no content.")
        else:
            raise exceptions.RequestParameterInvalidException("Wrong format parameter specified")
Esempio n. 13
0
    def download(self, trans, format, **kwd):
        """
        GET /api/libraries/datasets/download/{format}
        POST /api/libraries/datasets/download/{format}

        Download requested datasets (identified by encoded IDs) in requested format.

        example: ``GET localhost:8080/api/libraries/datasets/download/tbz?ld_ids%255B%255D=a0d84b45643a2678&ld_ids%255B%255D=fe38c84dcd46c828``

        .. note:: supported format values are: 'zip', 'tgz', 'tbz', 'uncompressed'

        :param  format:      string representing requested archive format
        :type   format:      string
        :param  ld_ids[]:      an array of encoded dataset ids
        :type   ld_ids[]:      an array
        :param  folder_ids[]:      an array of encoded folder ids
        :type   folder_ids[]:      an array

        :returns: either archive with the requested datasets packed inside or a single uncompressed dataset
        :rtype:   file

        :raises: MessageException, ItemDeletionException, ItemAccessibilityException, HTTPBadRequest, OSError, IOError, ObjectNotFound
        """
        library_datasets = []
        datasets_to_download = kwd.get('ld_ids%5B%5D', None)
        if datasets_to_download is None:
            datasets_to_download = kwd.get('ld_ids', None)
        if datasets_to_download is not None:
            datasets_to_download = util.listify(datasets_to_download)
            for dataset_id in datasets_to_download:
                try:
                    library_dataset = self.get_library_dataset(
                        trans,
                        id=dataset_id,
                        check_ownership=False,
                        check_accessible=True)
                    library_datasets.append(library_dataset)
                except HTTPBadRequest:
                    raise exceptions.RequestParameterInvalidException(
                        'Bad Request.')
                except HTTPInternalServerError:
                    raise exceptions.InternalServerError('Internal error.')
                except Exception as e:
                    raise exceptions.InternalServerError('Unknown error.' +
                                                         util.unicodify(e))

        folders_to_download = kwd.get('folder_ids%5B%5D', None)
        if folders_to_download is None:
            folders_to_download = kwd.get('folder_ids', None)
        if folders_to_download is not None:
            folders_to_download = util.listify(folders_to_download)

            current_user_roles = trans.get_current_user_roles()

            def traverse(folder):
                admin = trans.user_is_admin
                rval = []
                for subfolder in folder.active_folders:
                    if not admin:
                        can_access, folder_ids = trans.app.security_agent.check_folder_contents(
                            trans.user, current_user_roles, subfolder)
                    if (admin or can_access) and not subfolder.deleted:
                        rval.extend(traverse(subfolder))
                for ld in folder.datasets:
                    if not admin:
                        can_access = trans.app.security_agent.can_access_dataset(
                            current_user_roles,
                            ld.library_dataset_dataset_association.dataset)
                    if (admin or can_access) and not ld.deleted:
                        rval.append(ld)
                return rval

            for encoded_folder_id in folders_to_download:
                folder_id = self.folder_manager.cut_and_decode(
                    trans, encoded_folder_id)
                folder = self.folder_manager.get(trans, folder_id)
                library_datasets.extend(traverse(folder))

        if not library_datasets:
            raise exceptions.RequestParameterMissingException(
                'Request has to contain a list of dataset ids or folder ids to download.'
            )

        if format in ['zip', 'tgz', 'tbz']:
            # error = False
            killme = string.punctuation + string.whitespace
            trantab = string.maketrans(killme, '_' * len(killme))
            try:
                outext = 'zip'
                if format == 'zip':
                    # Can't use mkstemp - the file must not exist first
                    tmpd = tempfile.mkdtemp()
                    util.umask_fix_perms(tmpd, trans.app.config.umask, 0o777,
                                         self.app.config.gid)
                    tmpf = os.path.join(tmpd, 'library_download.' + format)
                    if trans.app.config.upstream_gzip:
                        archive = zipfile.ZipFile(tmpf, 'w',
                                                  zipfile.ZIP_STORED, True)
                    else:
                        archive = zipfile.ZipFile(tmpf, 'w',
                                                  zipfile.ZIP_DEFLATED, True)
                    archive.add = lambda x, y: archive.write(
                        x, y.encode('CP437'))
                elif format == 'tgz':
                    if trans.app.config.upstream_gzip:
                        archive = StreamBall('w|')
                        outext = 'tar'
                    else:
                        archive = StreamBall('w|gz')
                        outext = 'tgz'
                elif format == 'tbz':
                    archive = StreamBall('w|bz2')
                    outext = 'tbz2'
            except (OSError, zipfile.BadZipfile):
                log.exception("Unable to create archive for download")
                raise exceptions.InternalServerError(
                    "Unable to create archive for download.")
            except Exception:
                log.exception(
                    "Unexpected error in create archive for download")
                raise exceptions.InternalServerError(
                    "Unable to create archive for download.")
            composite_extensions = trans.app.datatypes_registry.get_composite_extensions(
            )
            seen = []
            for ld in library_datasets:
                ldda = ld.library_dataset_dataset_association
                ext = ldda.extension
                is_composite = ext in composite_extensions
                path = ""
                parent_folder = ldda.library_dataset.folder
                while parent_folder is not None:
                    # Exclude the now-hidden "root folder"
                    if parent_folder.parent is None:
                        path = os.path.join(parent_folder.library_root[0].name,
                                            path)
                        break
                    path = os.path.join(parent_folder.name, path)
                    parent_folder = parent_folder.parent
                path += ldda.name
                while path in seen:
                    path += '_'
                path = "{path}.{extension}".format(path=path,
                                                   extension=ldda.extension)
                seen.append(path)
                zpath = os.path.split(path)[-1]  # comes as base_name/fname
                outfname, zpathext = os.path.splitext(zpath)

                if is_composite:
                    # need to add all the components from the extra_files_path to the zip
                    if zpathext == '':
                        zpath = '%s.html' % zpath  # fake the real nature of the html file
                    try:
                        if format == 'zip':
                            archive.add(
                                ldda.dataset.file_name,
                                zpath)  # add the primary of a composite set
                        else:
                            archive.add(ldda.dataset.file_name,
                                        zpath,
                                        check_file=True
                                        )  # add the primary of a composite set
                    except IOError:
                        log.exception(
                            "Unable to add composite parent %s to temporary library download archive",
                            ldda.dataset.file_name)
                        raise exceptions.InternalServerError(
                            "Unable to create archive for download.")
                    except ObjectNotFound:
                        log.exception(
                            "Requested dataset %s does not exist on the host.",
                            ldda.dataset.file_name)
                        raise exceptions.ObjectNotFound(
                            "Requested dataset not found. ")
                    except Exception as e:
                        log.exception(
                            "Unable to add composite parent %s to temporary library download archive",
                            ldda.dataset.file_name)
                        raise exceptions.InternalServerError(
                            "Unable to add composite parent to temporary library download archive. "
                            + util.unicodify(e))

                    flist = glob.glob(
                        os.path.join(ldda.dataset.extra_files_path,
                                     '*.*'))  # glob returns full paths
                    for fpath in flist:
                        efp, fname = os.path.split(fpath)
                        if fname > '':
                            fname = fname.translate(trantab)
                        try:
                            if format == 'zip':
                                archive.add(fpath, fname)
                            else:
                                archive.add(fpath, fname, check_file=True)
                        except IOError:
                            log.exception(
                                "Unable to add %s to temporary library download archive %s",
                                fname, outfname)
                            raise exceptions.InternalServerError(
                                "Unable to create archive for download.")
                        except ObjectNotFound:
                            log.exception(
                                "Requested dataset %s does not exist on the host.",
                                fpath)
                            raise exceptions.ObjectNotFound(
                                "Requested dataset not found.")
                        except Exception as e:
                            log.exception(
                                "Unable to add %s to temporary library download archive %s",
                                fname, outfname)
                            raise exceptions.InternalServerError(
                                "Unable to add dataset to temporary library download archive . "
                                + util.unicodify(e))
                else:
                    try:
                        if format == 'zip':
                            archive.add(ldda.dataset.file_name, path)
                        else:
                            archive.add(ldda.dataset.file_name,
                                        path,
                                        check_file=True)
                    except IOError:
                        log.exception(
                            "Unable to write %s to temporary library download archive",
                            ldda.dataset.file_name)
                        raise exceptions.InternalServerError(
                            "Unable to create archive for download")
                    except ObjectNotFound:
                        log.exception(
                            "Requested dataset %s does not exist on the host.",
                            ldda.dataset.file_name)
                        raise exceptions.ObjectNotFound(
                            "Requested dataset not found.")
                    except Exception as e:
                        log.exception(
                            "Unable to add %s to temporary library download archive %s",
                            ldda.dataset.file_name, outfname)
                        raise exceptions.InternalServerError(
                            "Unknown error. " + util.unicodify(e))
            lname = 'selected_dataset'
            fname = lname.replace(' ', '_') + '_files'
            if format == 'zip':
                archive.close()
                trans.response.set_content_type("application/octet-stream")
                trans.response.headers[
                    "Content-Disposition"] = 'attachment; filename="%s.%s"' % (
                        fname, outext)
                archive = util.streamball.ZipBall(tmpf, tmpd)
                archive.wsgi_status = trans.response.wsgi_status()
                archive.wsgi_headeritems = trans.response.wsgi_headeritems()
                return archive.stream
            else:
                trans.response.set_content_type("application/x-tar")
                trans.response.headers[
                    "Content-Disposition"] = 'attachment; filename="%s.%s"' % (
                        fname, outext)
                archive.wsgi_status = trans.response.wsgi_status()
                archive.wsgi_headeritems = trans.response.wsgi_headeritems()
                return archive.stream
        elif format == 'uncompressed':
            if len(library_datasets) != 1:
                raise exceptions.RequestParameterInvalidException(
                    "You can download only one uncompressed file at once.")
            else:
                single_ld = library_datasets[0]
                ldda = single_ld.library_dataset_dataset_association
                dataset = ldda.dataset
                fStat = os.stat(dataset.file_name)
                trans.response.set_content_type(ldda.get_mime())
                trans.response.headers['Content-Length'] = int(fStat.st_size)
                fname = "{path}.{extension}".format(path=ldda.name,
                                                    extension=ldda.extension)
                fname = ''.join(c in util.FILENAME_VALID_CHARS and c or '_'
                                for c in fname)[0:150]
                trans.response.headers[
                    "Content-Disposition"] = 'attachment; filename="%s"' % fname
                try:
                    return open(dataset.file_name, 'rb')
                except Exception:
                    raise exceptions.InternalServerError(
                        "This dataset contains no content.")
        else:
            raise exceptions.RequestParameterInvalidException(
                "Wrong format parameter specified")
Esempio n. 14
0
    def _archive_composite_dataset(self, trans, data=None, **kwd):
        # save a composite object into a compressed archive for downloading
        params = util.Params(kwd)
        outfname = data.name[0:150]
        outfname = ''.join(c in FILENAME_VALID_CHARS and c or '_' for c in outfname)
        if params.do_action is None:
            params.do_action = 'zip'  # default
        msg = util.restore_text(params.get('msg', ''))
        if not data:
            msg = "You must select at least one dataset"
        else:
            error = False
            try:
                if params.do_action == 'zip':
                    # Can't use mkstemp - the file must not exist first
                    tmpd = tempfile.mkdtemp(dir=trans.app.config.new_file_path, prefix='gx_composite_archive_')
                    util.umask_fix_perms(tmpd, trans.app.config.umask, 0o777, trans.app.config.gid)
                    tmpf = os.path.join(tmpd, 'library_download.' + params.do_action)
                    archive = zipfile.ZipFile(tmpf, 'w', zipfile.ZIP_DEFLATED, True)

                    def zipfile_add(fpath, arcname):
                        encoded_arcname = arcname.encode('CP437')
                        try:
                            archive.write(fpath, encoded_arcname)
                        except TypeError:
                            # Despite documenting the need for CP437 encoded arcname,
                            # python 3 actually needs this to be a unicode string ...
                            # https://bugs.python.org/issue24110
                            archive.write(fpath, arcname)

                    archive.add = zipfile_add

                elif params.do_action == 'tgz':
                    archive = util.streamball.StreamBall('w|gz')
                elif params.do_action == 'tbz':
                    archive = util.streamball.StreamBall('w|bz2')
            except (OSError, zipfile.BadZipFile):
                error = True
                log.exception("Unable to create archive for download")
                msg = "Unable to create archive for %s for download, please report this error" % outfname
            if not error:
                ext = data.extension
                path = data.file_name
                efp = data.extra_files_path
                # Add any central file to the archive,

                display_name = os.path.splitext(outfname)[0]
                if not display_name.endswith(ext):
                    display_name = '%s_%s' % (display_name, ext)

                error, msg = self._archive_main_file(archive, display_name, path)[:2]
                if not error:
                    # Add any child files to the archive,
                    for fpath, rpath in self.__archive_extra_files_path(extra_files_path=efp):
                        try:
                            archive.add(fpath, rpath)
                        except IOError:
                            error = True
                            log.exception("Unable to add %s to temporary library download archive", rpath)
                            msg = "Unable to create archive for download, please report this error"
                            continue
                if not error:
                    if params.do_action == 'zip':
                        archive.close()
                        tmpfh = open(tmpf, 'rb')
                        # CANNOT clean up - unlink/rmdir was always failing because file handle retained to return - must rely on a cron job to clean up tmp
                        trans.response.set_content_type("application/x-zip-compressed")
                        trans.response.headers["Content-Disposition"] = 'attachment; filename="%s.zip"' % outfname
                        return tmpfh
                    else:
                        trans.response.set_content_type("application/x-tar")
                        outext = 'tgz'
                        if params.do_action == 'tbz':
                            outext = 'tbz'
                        trans.response.headers["Content-Disposition"] = 'attachment; filename="%s.%s"' % (outfname, outext)
                        archive.wsgi_status = trans.response.wsgi_status()
                        archive.wsgi_headeritems = trans.response.wsgi_headeritems()
                        return archive.stream
        return trans.show_error_message(msg)
Esempio n. 15
0
    def finish( self, stdout, stderr ):
        """
        Called to indicate that the associated command has been run. Updates
        the output datasets based on stderr and stdout from the command, and
        the contents of the output files.
        """
        # default post job setup
        self.sa_session.expunge_all()
        job = self.get_job()

        try:
            self.reclaim_ownership()
        except:
            self.fail( job.info )
            log.exception( '(%s) Failed to change ownership of %s, failing' % ( job.id, self.working_directory ) )

        # if the job was deleted, don't finish it
        if job.state == job.states.DELETED or job.state == job.states.ERROR:
            #ERROR at this point means the job was deleted by an administrator.
            return self.fail( job.info )
        if stderr:
            job.state = job.states.ERROR
        else:
            job.state = job.states.OK
        if self.version_string_cmd:
            version_filename = self.get_version_string_path()
            if os.path.exists(version_filename):
                self.version_string = open(version_filename).read()
                os.unlink(version_filename)

        if self.app.config.outputs_to_working_directory and not self.__link_file_check():
            for dataset_path in self.get_output_fnames():
                try:
                    shutil.move( dataset_path.false_path, dataset_path.real_path )
                    log.debug( "finish(): Moved %s to %s" % ( dataset_path.false_path, dataset_path.real_path ) )
                except ( IOError, OSError ):
                    # this can happen if Galaxy is restarted during the job's
                    # finish method - the false_path file has already moved,
                    # and when the job is recovered, it won't be found.
                    if os.path.exists( dataset_path.real_path ) and os.stat( dataset_path.real_path ).st_size > 0:
                        log.warning( "finish(): %s not found, but %s is not empty, so it will be used instead" % ( dataset_path.false_path, dataset_path.real_path ) )
                    else:
                        return self.fail( "Job %s's output dataset(s) could not be read" % job.id )
        job_context = ExpressionContext( dict( stdout = stdout, stderr = stderr ) )
        job_tool = self.app.toolbox.tools_by_id.get( job.tool_id, None )
        def in_directory( file, directory ):
            # Make both absolute.
            directory = os.path.abspath( directory )
            file = os.path.abspath( file )

            #Return true, if the common prefix of both is equal to directory
            #e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b
            return os.path.commonprefix( [ file, directory ] ) == directory
        for dataset_assoc in job.output_datasets + job.output_library_datasets:
            context = self.get_dataset_finish_context( job_context, dataset_assoc.dataset.dataset )
            #should this also be checking library associations? - can a library item be added from a history before the job has ended? - lets not allow this to occur
            for dataset in dataset_assoc.dataset.dataset.history_associations + dataset_assoc.dataset.dataset.library_associations: #need to update all associated output hdas, i.e. history was shared with job running
                #
                # If HDA is to be copied from the working directory, do it now so that other attributes are correctly set.
                #
                if isinstance( dataset, model.HistoryDatasetAssociation ):
                    joda = self.sa_session.query( model.JobToOutputDatasetAssociation ).filter_by( job=job, dataset=dataset ).first()
                    if joda and job_tool:
                        hda_tool_output = job_tool.outputs.get( joda.name, None )
                        if hda_tool_output and hda_tool_output.from_work_dir:
                            # Copy from working dir to HDA.
                            source_file = os.path.join( os.path.abspath( self.working_directory ), hda_tool_output.from_work_dir )
                            if in_directory( source_file, self.working_directory ):
                                try:
                                    shutil.move( source_file, dataset.file_name )
                                    log.debug( "finish(): Moved %s to %s as directed by from_work_dir" % ( source_file, dataset.file_name ) )
                                except ( IOError, OSError ):
                                    log.debug( "finish(): Could not move %s to %s as directed by from_work_dir" % ( source_file, dataset.file_name ) )
                            else:
                                # Security violation.
                                log.exception( "from_work_dir specified a location not in the working directory: %s, %s" % ( source_file, self.working_directory ) )

                dataset.blurb = 'done'
                dataset.peek  = 'no peek'
                dataset.info = ( dataset.info  or '' ) + context['stdout'] + context['stderr']
                dataset.tool_version = self.version_string
                dataset.set_size()
                # Update (non-library) job output datasets through the object store
                if dataset not in job.output_library_datasets:
                    self.app.object_store.update_from_file(dataset.dataset, create=True)
                if context['stderr']:
                    dataset.blurb = "error"
                elif dataset.has_data():
                    # If the tool was expected to set the extension, attempt to retrieve it
                    if dataset.ext == 'auto':
                        dataset.extension = context.get( 'ext', 'data' )
                        dataset.init_meta( copy_from=dataset )
                    #if a dataset was copied, it won't appear in our dictionary:
                    #either use the metadata from originating output dataset, or call set_meta on the copies
                    #it would be quicker to just copy the metadata from the originating output dataset,
                    #but somewhat trickier (need to recurse up the copied_from tree), for now we'll call set_meta()
                    if not self.app.config.set_metadata_externally or \
                     ( not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) \
                       and self.app.config.retry_metadata_internally ):
                        dataset.set_meta( overwrite = False )
                    elif not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) and not context['stderr']:
                        dataset._state = model.Dataset.states.FAILED_METADATA
                    else:
                        #load metadata from file
                        #we need to no longer allow metadata to be edited while the job is still running,
                        #since if it is edited, the metadata changed on the running output will no longer match
                        #the metadata that was stored to disk for use via the external process,
                        #and the changes made by the user will be lost, without warning or notice
                        dataset.metadata.from_JSON_dict( self.external_output_metadata.get_output_filenames_by_dataset( dataset, self.sa_session ).filename_out )
                    try:
                        assert context.get( 'line_count', None ) is not None
                        if ( not dataset.datatype.composite_type and dataset.dataset.is_multi_byte() ) or self.tool.is_multi_byte:
                            dataset.set_peek( line_count=context['line_count'], is_multi_byte=True )
                        else:
                            dataset.set_peek( line_count=context['line_count'] )
                    except:
                        if ( not dataset.datatype.composite_type and dataset.dataset.is_multi_byte() ) or self.tool.is_multi_byte:
                            dataset.set_peek( is_multi_byte=True )
                        else:
                            dataset.set_peek()
                    try:
                        # set the name if provided by the tool
                        dataset.name = context['name']
                    except:
                        pass
                else:
                    dataset.blurb = "empty"
                    if dataset.ext == 'auto':
                        dataset.extension = 'txt'
                self.sa_session.add( dataset )
            if context['stderr']:
                dataset_assoc.dataset.dataset.state = model.Dataset.states.ERROR
            else:
                dataset_assoc.dataset.dataset.state = model.Dataset.states.OK
            # If any of the rest of the finish method below raises an
            # exception, the fail method will run and set the datasets to
            # ERROR.  The user will never see that the datasets are in error if
            # they were flushed as OK here, since upon doing so, the history
            # panel stops checking for updates.  So allow the
            # self.sa_session.flush() at the bottom of this method set
            # the state instead.

        for pja in job.post_job_actions:
            ActionBox.execute(self.app, self.sa_session, pja.post_job_action, job)
        # Flush all the dataset and job changes above.  Dataset state changes
        # will now be seen by the user.
        self.sa_session.flush()
        # Save stdout and stderr
        if len( stdout ) > 32768:
            log.error( "stdout for job %d is greater than 32K, only first part will be logged to database" % job.id )
        job.stdout = stdout[:32768]
        if len( stderr ) > 32768:
            log.error( "stderr for job %d is greater than 32K, only first part will be logged to database" % job.id )
        job.stderr = stderr[:32768]
        # custom post process setup
        inp_data = dict( [ ( da.name, da.dataset ) for da in job.input_datasets ] )
        out_data = dict( [ ( da.name, da.dataset ) for da in job.output_datasets ] )
        inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] )
        out_data.update( [ ( da.name, da.dataset ) for da in job.output_library_datasets ] )
        param_dict = dict( [ ( p.name, p.value ) for p in job.parameters ] ) # why not re-use self.param_dict here? ##dunno...probably should, this causes tools.parameters.basic.UnvalidatedValue to be used in following methods instead of validated and transformed values during i.e. running workflows
        param_dict = self.tool.params_from_strings( param_dict, self.app )
        # Check for and move associated_files
        self.tool.collect_associated_files(out_data, self.working_directory)
        gitd = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first()
        if gitd:
            self.tool.collect_associated_files({'' : gitd}, self.working_directory)
        # Create generated output children and primary datasets and add to param_dict
        collected_datasets = {'children':self.tool.collect_child_datasets(out_data, self.working_directory),'primary':self.tool.collect_primary_datasets(out_data, self.working_directory)}
        param_dict.update({'__collected_datasets__':collected_datasets})
        # Certain tools require tasks to be completed after job execution
        # ( this used to be performed in the "exec_after_process" hook, but hooks are deprecated ).
        self.tool.exec_after_process( self.queue.app, inp_data, out_data, param_dict, job = job )
        # Call 'exec_after_process' hook
        self.tool.call_hook( 'exec_after_process', self.queue.app, inp_data=inp_data,
                             out_data=out_data, param_dict=param_dict,
                             tool=self.tool, stdout=stdout, stderr=stderr )
        job.command_line = self.command_line

        bytes = 0
        # Once datasets are collected, set the total dataset size (includes extra files)
        for dataset_assoc in job.output_datasets:
            dataset_assoc.dataset.dataset.set_total_size()
            bytes += dataset_assoc.dataset.dataset.get_total_size()

        if job.user:
            job.user.total_disk_usage += bytes

        # fix permissions
        for path in [ dp.real_path for dp in self.get_output_fnames() ]:
            util.umask_fix_perms( path, self.app.config.umask, 0666, self.app.config.gid )
        self.sa_session.flush()
        log.debug( 'job %d ended' % self.job_id )
        if self.app.config.cleanup_job == 'always' or ( not stderr and self.app.config.cleanup_job == 'onsuccess' ):
            self.cleanup()