def _update_from_file(self, obj, file_name=None, create=False, **kwargs): """`create` parameter is not used in this implementation.""" preserve_symlinks = kwargs.pop('preserve_symlinks', False) # FIXME: symlinks and the object store model may not play well together # these should be handled better, e.g. registering the symlink'd file # as an object if create: self._create(obj, **kwargs) if file_name and self._exists(obj, **kwargs): try: if preserve_symlinks and os.path.islink(file_name): force_symlink(os.readlink(file_name), self._get_filename(obj, **kwargs)) else: path = self._get_filename(obj, **kwargs) shutil.copy(file_name, path) umask_fix_perms(path, self.config.umask, 0o666) except shutil.SameFileError: # That's ok, we need to ignore this so that remote object stores can update # the remote object from the cache file path pass except OSError as ex: log.critical( f'Error copying {file_name} to {self.__get_filename(obj, **kwargs)}: {ex}' ) raise ex
def _fix_permissions(self, rel_path): """ Set permissions on rel_path""" for basedir, _, files in os.walk(rel_path): umask_fix_perms(basedir, self.config.umask, 0o777, self.config.gid) for filename in files: path = os.path.join(basedir, filename) # Ignore symlinks if os.path.islink(path): continue umask_fix_perms( path, self.config.umask, 0o666, self.config.gid )
def _fix_permissions(self, rel_path): """ Set permissions on rel_path""" for basedir, _, files in os.walk(rel_path): umask_fix_perms(basedir, self.config.umask, 0o777, self.config.gid) for filename in files: path = os.path.join(basedir, filename) # Ignore symlinks if os.path.islink(path): continue umask_fix_perms(path, self.config.umask, 0o666, self.config.gid)
def create(self, obj, **kwargs): """Override `ObjectStore`'s stub by creating any files and folders on disk.""" if not self.exists(obj, **kwargs): path = self._construct_path(obj, **kwargs) dir_only = kwargs.get('dir_only', False) # Create directory if it does not exist dir = path if dir_only else os.path.dirname(path) safe_makedirs(dir) # Create the file if it does not exist if not dir_only: open(path, 'w').close() # Should be rb? umask_fix_perms(path, self.config.umask, 0o666)
def create(self, obj, **kwargs): if not self.exists(obj, **kwargs): path = self._construct_path(obj, **kwargs) dir_only = kwargs.get('dir_only', False) # Create directory if it does not exist dir = path if dir_only else os.path.dirname(path) if not os.path.exists(dir): os.makedirs(dir) # Create the file if it does not exist if not dir_only: open(path, 'w').close() # Should be rb? umask_fix_perms(path, self.config.umask, 0o666)
def create(self, obj, **kwargs): if not self.exists(obj, **kwargs): path = self._construct_path(obj, **kwargs) dir_only = kwargs.get('dir_only', False) # Create directory if it does not exist dir = path if dir_only else os.path.dirname(path) if not os.path.exists(dir): os.makedirs(dir) # Create the file if it does not exist if not dir_only: open(path, 'w').close() # Should be rb? umask_fix_perms(path, self.config.umask, 0666)
def update_from_file(self, obj, file_name=None, create=False, **kwargs): """`create` parameter is not used in this implementation.""" preserve_symlinks = kwargs.pop('preserve_symlinks', False) # FIXME: symlinks and the object store model may not play well together # these should be handled better, e.g. registering the symlink'd file # as an object if create: self.create(obj, **kwargs) if file_name and self.exists(obj, **kwargs): try: if preserve_symlinks and os.path.islink(file_name): force_symlink(os.readlink(file_name), self.get_filename(obj, **kwargs)) else: path = self.get_filename(obj, **kwargs) shutil.move(file_name, path) umask_fix_perms(path, self.config.umask, 0o666) except IOError as ex: log.critical('Error copying %s to %s: %s' % (file_name, self._get_filename(obj, **kwargs), ex)) raise ex
def update_from_file(self, obj, file_name=None, create=False, **kwargs): """`create` parameter is not used in this implementation.""" preserve_symlinks = kwargs.pop('preserve_symlinks', False) # FIXME: symlinks and the object store model may not play well together # these should be handled better, e.g. registering the symlink'd file # as an object if create: self.create(obj, **kwargs) if file_name and self.exists(obj, **kwargs): try: if preserve_symlinks and os.path.islink(file_name): force_symlink(os.readlink(file_name), self.get_filename(obj, **kwargs)) else: path = self.get_filename(obj, **kwargs) shutil.copy(file_name, path) umask_fix_perms(path, self.config.umask, 0o666) except IOError as ex: log.critical('Error copying %s to %s: %s' % (file_name, self._get_filename(obj, **kwargs), ex)) raise ex
raise exceptions.InternalServerError( 'Internal error.' ) except Exception, e: raise exceptions.InternalServerError( 'Unknown error.' ) else: raise exceptions.RequestParameterMissingException( 'Request has to contain a list of dataset ids to download.' ) if format in [ 'zip', 'tgz', 'tbz' ]: # error = False killme = string.punctuation + string.whitespace trantab = string.maketrans( killme, '_'*len( killme ) ) try: outext = 'zip' if format == 'zip': # Can't use mkstemp - the file must not exist first tmpd = tempfile.mkdtemp() util.umask_fix_perms( tmpd, trans.app.config.umask, 0777, self.app.config.gid ) tmpf = os.path.join( tmpd, 'library_download.' + format ) if trans.app.config.upstream_gzip: archive = zipfile.ZipFile( tmpf, 'w', zipfile.ZIP_STORED, True ) else: archive = zipfile.ZipFile( tmpf, 'w', zipfile.ZIP_DEFLATED, True ) archive.add = lambda x, y: archive.write( x, y.encode( 'CP437' ) ) elif format == 'tgz': if trans.app.config.upstream_gzip: archive = StreamBall( 'w|' ) outext = 'tar' else: archive = StreamBall( 'w|gz' ) outext = 'tgz' elif format == 'tbz': archive = StreamBall( 'w|bz2' )
def _archive_composite_dataset(self, trans, data=None, **kwd): # save a composite object into a compressed archive for downloading params = util.Params(kwd) outfname = data.name[0:150] outfname = ''.join(c in FILENAME_VALID_CHARS and c or '_' for c in outfname) if params.do_action is None: params.do_action = 'zip' # default msg = util.restore_text(params.get('msg', '')) if not data: msg = "You must select at least one dataset" else: error = False try: if params.do_action == 'zip': # Can't use mkstemp - the file must not exist first tmpd = tempfile.mkdtemp() util.umask_fix_perms(tmpd, trans.app.config.umask, 0o777, trans.app.config.gid) tmpf = os.path.join(tmpd, 'library_download.' + params.do_action) archive = zipfile.ZipFile(tmpf, 'w', zipfile.ZIP_DEFLATED, True) archive.add = lambda x, y: archive.write( x, y.encode('CP437')) elif params.do_action == 'tgz': archive = util.streamball.StreamBall('w|gz') elif params.do_action == 'tbz': archive = util.streamball.StreamBall('w|bz2') except (OSError, zipfile.BadZipFile): error = True log.exception("Unable to create archive for download") msg = "Unable to create archive for %s for download, please report this error" % outfname if not error: ext = data.extension path = data.file_name fname = os.path.split(path)[-1] efp = data.extra_files_path # Add any central file to the archive, display_name = os.path.splitext(outfname)[0] if not display_name.endswith(ext): display_name = '%s_%s' % (display_name, ext) error, msg = self._archive_main_file(archive, display_name, path)[:2] if not error: # Add any child files to the archive, for root, dirs, files in os.walk(efp): for fname in files: fpath = os.path.join(root, fname) rpath = os.path.relpath(fpath, efp) try: archive.add(fpath, rpath) except IOError: error = True log.exception( "Unable to add %s to temporary library download archive" % rpath) msg = "Unable to create archive for download, please report this error" continue if not error: if params.do_action == 'zip': archive.close() tmpfh = open(tmpf) # CANNOT clean up - unlink/rmdir was always failing because file handle retained to return - must rely on a cron job to clean up tmp trans.response.set_content_type( "application/x-zip-compressed") trans.response.headers[ "Content-Disposition"] = 'attachment; filename="%s.zip"' % outfname return tmpfh else: trans.response.set_content_type("application/x-tar") outext = 'tgz' if params.do_action == 'tbz': outext = 'tbz' trans.response.headers[ "Content-Disposition"] = 'attachment; filename="%s.%s"' % ( outfname, outext) archive.wsgi_status = trans.response.wsgi_status() archive.wsgi_headeritems = trans.response.wsgi_headeritems( ) return archive.stream return trans.show_error_message(msg)
def _archive_composite_dataset( self, trans, data=None, **kwd ): # save a composite object into a compressed archive for downloading params = util.Params( kwd ) valid_chars = '.,^_-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' outfname = data.name[0:150] outfname = ''.join(c in valid_chars and c or '_' for c in outfname) if params.do_action is None: params.do_action = 'zip' # default msg = util.restore_text( params.get( 'msg', '' ) ) if not data: msg = "You must select at least one dataset" else: error = False try: if params.do_action == 'zip': # Can't use mkstemp - the file must not exist first tmpd = tempfile.mkdtemp() util.umask_fix_perms( tmpd, trans.app.config.umask, 0o777, trans.app.config.gid ) tmpf = os.path.join( tmpd, 'library_download.' + params.do_action ) archive = zipfile.ZipFile( tmpf, 'w', zipfile.ZIP_DEFLATED, True ) archive.add = lambda x, y: archive.write( x, y.encode('CP437') ) elif params.do_action == 'tgz': archive = util.streamball.StreamBall( 'w|gz' ) elif params.do_action == 'tbz': archive = util.streamball.StreamBall( 'w|bz2' ) except (OSError, zipfile.BadZipFile): error = True log.exception( "Unable to create archive for download" ) msg = "Unable to create archive for %s for download, please report this error" % outfname if not error: ext = data.extension path = data.file_name fname = os.path.split(path)[-1] efp = data.extra_files_path # Add any central file to the archive, display_name = os.path.splitext(outfname)[0] if not display_name.endswith(ext): display_name = '%s_%s' % (display_name, ext) error, msg = self._archive_main_file(archive, display_name, path)[:2] if not error: # Add any child files to the archive, for root, dirs, files in os.walk(efp): for fname in files: fpath = os.path.join(root, fname) rpath = os.path.relpath(fpath, efp) try: archive.add( fpath, rpath ) except IOError: error = True log.exception( "Unable to add %s to temporary library download archive" % rpath) msg = "Unable to create archive for download, please report this error" continue if not error: if params.do_action == 'zip': archive.close() tmpfh = open( tmpf ) # CANNOT clean up - unlink/rmdir was always failing because file handle retained to return - must rely on a cron job to clean up tmp trans.response.set_content_type( "application/x-zip-compressed" ) trans.response.headers[ "Content-Disposition" ] = 'attachment; filename="%s.zip"' % outfname return tmpfh else: trans.response.set_content_type( "application/x-tar" ) outext = 'tgz' if params.do_action == 'tbz': outext = 'tbz' trans.response.headers[ "Content-Disposition" ] = 'attachment; filename="%s.%s"' % (outfname, outext) archive.wsgi_status = trans.response.wsgi_status() archive.wsgi_headeritems = trans.response.wsgi_headeritems() return archive.stream return trans.show_error_message( msg )
def download(self, trans, format, **kwd): """ Download requested datasets (identified by encoded IDs) in requested format. * GET /api/libraries/datasets/download/{format} * POST /api/libraries/datasets/download/{format} example: ``GET localhost:8080/api/libraries/datasets/download/tbz?ld_ids%255B%255D=a0d84b45643a2678&ld_ids%255B%255D=fe38c84dcd46c828`` .. note:: supported format values are: 'zip', 'tgz', 'tbz', 'uncompressed' :param format: string representing requested archive format :type format: string :param ld_ids[]: an array of encoded dataset ids :type ld_ids[]: an array :param folder_ids[]: an array of encoded folder ids :type folder_ids[]: an array :returns: either archive with the requested datasets packed inside or a single uncompressed dataset :rtype: file :raises: MessageException, ItemDeletionException, ItemAccessibilityException, HTTPBadRequest, OSError, IOError, ObjectNotFound """ library_datasets = [] datasets_to_download = kwd.get('ld_ids%5B%5D', None) if datasets_to_download is None: datasets_to_download = kwd.get('ld_ids', None) if datasets_to_download is not None: datasets_to_download = util.listify(datasets_to_download) for dataset_id in datasets_to_download: try: library_dataset = self.get_library_dataset(trans, id=dataset_id, check_ownership=False, check_accessible=True) library_datasets.append(library_dataset) except HTTPBadRequest: raise exceptions.RequestParameterInvalidException('Bad Request.') except HTTPInternalServerError: raise exceptions.InternalServerError('Internal error.') except Exception as e: raise exceptions.InternalServerError('Unknown error.' + str(e)) folders_to_download = kwd.get('folder_ids%5B%5D', None) if folders_to_download is None: folders_to_download = kwd.get('folder_ids', None) if folders_to_download is not None: folders_to_download = util.listify(folders_to_download) current_user_roles = trans.get_current_user_roles() def traverse(folder): admin = trans.user_is_admin() rval = [] for subfolder in folder.active_folders: if not admin: can_access, folder_ids = trans.app.security_agent.check_folder_contents(trans.user, current_user_roles, subfolder) if (admin or can_access) and not subfolder.deleted: rval.extend(traverse(subfolder)) for ld in folder.datasets: if not admin: can_access = trans.app.security_agent.can_access_dataset( current_user_roles, ld.library_dataset_dataset_association.dataset ) if (admin or can_access) and not ld.deleted: rval.append(ld) return rval for encoded_folder_id in folders_to_download: folder_id = self.folder_manager.cut_and_decode(trans, encoded_folder_id) folder = self.folder_manager.get(trans, folder_id) library_datasets.extend(traverse(folder)) if not library_datasets: raise exceptions.RequestParameterMissingException('Request has to contain a list of dataset ids or folder ids to download.') if format in ['zip', 'tgz', 'tbz']: # error = False killme = string.punctuation + string.whitespace trantab = string.maketrans(killme, '_' * len(killme)) try: outext = 'zip' if format == 'zip': # Can't use mkstemp - the file must not exist first tmpd = tempfile.mkdtemp() util.umask_fix_perms(tmpd, trans.app.config.umask, 0777, self.app.config.gid) tmpf = os.path.join(tmpd, 'library_download.' + format) if trans.app.config.upstream_gzip: archive = zipfile.ZipFile(tmpf, 'w', zipfile.ZIP_STORED, True) else: archive = zipfile.ZipFile(tmpf, 'w', zipfile.ZIP_DEFLATED, True) archive.add = lambda x, y: archive.write(x, y.encode('CP437')) elif format == 'tgz': if trans.app.config.upstream_gzip: archive = StreamBall('w|') outext = 'tar' else: archive = StreamBall('w|gz') outext = 'tgz' elif format == 'tbz': archive = StreamBall('w|bz2') outext = 'tbz2' except (OSError, zipfile.BadZipfile): log.exception("Unable to create archive for download") raise exceptions.InternalServerError("Unable to create archive for download.") except Exception: log.exception("Unexpected error in create archive for download") raise exceptions.InternalServerError("Unable to create archive for download.") composite_extensions = trans.app.datatypes_registry.get_composite_extensions() seen = [] for ld in library_datasets: ldda = ld.library_dataset_dataset_association ext = ldda.extension is_composite = ext in composite_extensions path = "" parent_folder = ldda.library_dataset.folder while parent_folder is not None: # Exclude the now-hidden "root folder" if parent_folder.parent is None: path = os.path.join(parent_folder.library_root[0].name, path) break path = os.path.join(parent_folder.name, path) parent_folder = parent_folder.parent path += ldda.name while path in seen: path += '_' seen.append(path) zpath = os.path.split(path)[-1] # comes as base_name/fname outfname, zpathext = os.path.splitext(zpath) if is_composite: # need to add all the components from the extra_files_path to the zip if zpathext == '': zpath = '%s.html' % zpath # fake the real nature of the html file try: if format == 'zip': archive.add(ldda.dataset.file_name, zpath) # add the primary of a composite set else: archive.add(ldda.dataset.file_name, zpath, check_file=True) # add the primary of a composite set except IOError: log.exception("Unable to add composite parent %s to temporary library download archive", ldda.dataset.file_name) raise exceptions.InternalServerError("Unable to create archive for download.") except ObjectNotFound: log.exception("Requested dataset %s does not exist on the host.", ldda.dataset.file_name) raise exceptions.ObjectNotFound("Requested dataset not found. ") except Exception as e: log.exception("Unable to add composite parent %s to temporary library download archive", ldda.dataset.file_name) raise exceptions.InternalServerError("Unable to add composite parent to temporary library download archive. " + str(e)) flist = glob.glob(os.path.join(ldda.dataset.extra_files_path, '*.*')) # glob returns full paths for fpath in flist: efp, fname = os.path.split(fpath) if fname > '': fname = fname.translate(trantab) try: if format == 'zip': archive.add(fpath, fname) else: archive.add(fpath, fname, check_file=True) except IOError: log.exception("Unable to add %s to temporary library download archive %s", fname, outfname) raise exceptions.InternalServerError("Unable to create archive for download.") except ObjectNotFound: log.exception("Requested dataset %s does not exist on the host.", fpath) raise exceptions.ObjectNotFound("Requested dataset not found.") except Exception as e: log.exception("Unable to add %s to temporary library download archive %s" % (fname, outfname)) raise exceptions.InternalServerError("Unable to add dataset to temporary library download archive . " + str(e)) else: try: if format == 'zip': archive.add(ldda.dataset.file_name, path) else: archive.add(ldda.dataset.file_name, path, check_file=True) except IOError: log.exception("Unable to write %s to temporary library download archive", ldda.dataset.file_name) raise exceptions.InternalServerError("Unable to create archive for download") except ObjectNotFound: log.exception("Requested dataset %s does not exist on the host.", ldda.dataset.file_name) raise exceptions.ObjectNotFound("Requested dataset not found.") except Exception as e: log.exception("Unable to add %s to temporary library download archive %s", fname, outfname) raise exceptions.InternalServerError("Unknown error. " + str(e)) lname = 'selected_dataset' fname = lname.replace(' ', '_') + '_files' if format == 'zip': archive.close() trans.response.set_content_type("application/octet-stream") trans.response.headers["Content-Disposition"] = 'attachment; filename="%s.%s"' % (fname, outext) archive = util.streamball.ZipBall(tmpf, tmpd) archive.wsgi_status = trans.response.wsgi_status() archive.wsgi_headeritems = trans.response.wsgi_headeritems() return archive.stream else: trans.response.set_content_type("application/x-tar") trans.response.headers["Content-Disposition"] = 'attachment; filename="%s.%s"' % (fname, outext) archive.wsgi_status = trans.response.wsgi_status() archive.wsgi_headeritems = trans.response.wsgi_headeritems() return archive.stream elif format == 'uncompressed': if len(library_datasets) != 1: raise exceptions.RequestParameterInvalidException("You can download only one uncompressed file at once.") else: single_ld = library_datasets[0] ldda = single_ld.library_dataset_dataset_association dataset = ldda.dataset fStat = os.stat(dataset.file_name) trans.response.set_content_type(ldda.get_mime()) trans.response.headers['Content-Length'] = int(fStat.st_size) fname = ldda.name fname = ''.join(c in util.FILENAME_VALID_CHARS and c or '_' for c in fname)[0:150] trans.response.headers["Content-Disposition"] = 'attachment; filename="%s"' % fname try: return open(dataset.file_name) except: raise exceptions.InternalServerError("This dataset contains no content.") else: raise exceptions.RequestParameterInvalidException("Wrong format parameter specified")
def download(self, trans, format, **kwd): """ GET /api/libraries/datasets/download/{format} POST /api/libraries/datasets/download/{format} Download requested datasets (identified by encoded IDs) in requested format. example: ``GET localhost:8080/api/libraries/datasets/download/tbz?ld_ids%255B%255D=a0d84b45643a2678&ld_ids%255B%255D=fe38c84dcd46c828`` .. note:: supported format values are: 'zip', 'tgz', 'tbz', 'uncompressed' :param format: string representing requested archive format :type format: string :param ld_ids[]: an array of encoded dataset ids :type ld_ids[]: an array :param folder_ids[]: an array of encoded folder ids :type folder_ids[]: an array :returns: either archive with the requested datasets packed inside or a single uncompressed dataset :rtype: file :raises: MessageException, ItemDeletionException, ItemAccessibilityException, HTTPBadRequest, OSError, IOError, ObjectNotFound """ library_datasets = [] datasets_to_download = kwd.get('ld_ids%5B%5D', None) if datasets_to_download is None: datasets_to_download = kwd.get('ld_ids', None) if datasets_to_download is not None: datasets_to_download = util.listify(datasets_to_download) for dataset_id in datasets_to_download: try: library_dataset = self.get_library_dataset( trans, id=dataset_id, check_ownership=False, check_accessible=True) library_datasets.append(library_dataset) except HTTPBadRequest: raise exceptions.RequestParameterInvalidException( 'Bad Request.') except HTTPInternalServerError: raise exceptions.InternalServerError('Internal error.') except Exception as e: raise exceptions.InternalServerError('Unknown error.' + util.unicodify(e)) folders_to_download = kwd.get('folder_ids%5B%5D', None) if folders_to_download is None: folders_to_download = kwd.get('folder_ids', None) if folders_to_download is not None: folders_to_download = util.listify(folders_to_download) current_user_roles = trans.get_current_user_roles() def traverse(folder): admin = trans.user_is_admin rval = [] for subfolder in folder.active_folders: if not admin: can_access, folder_ids = trans.app.security_agent.check_folder_contents( trans.user, current_user_roles, subfolder) if (admin or can_access) and not subfolder.deleted: rval.extend(traverse(subfolder)) for ld in folder.datasets: if not admin: can_access = trans.app.security_agent.can_access_dataset( current_user_roles, ld.library_dataset_dataset_association.dataset) if (admin or can_access) and not ld.deleted: rval.append(ld) return rval for encoded_folder_id in folders_to_download: folder_id = self.folder_manager.cut_and_decode( trans, encoded_folder_id) folder = self.folder_manager.get(trans, folder_id) library_datasets.extend(traverse(folder)) if not library_datasets: raise exceptions.RequestParameterMissingException( 'Request has to contain a list of dataset ids or folder ids to download.' ) if format in ['zip', 'tgz', 'tbz']: # error = False killme = string.punctuation + string.whitespace trantab = string.maketrans(killme, '_' * len(killme)) try: outext = 'zip' if format == 'zip': # Can't use mkstemp - the file must not exist first tmpd = tempfile.mkdtemp() util.umask_fix_perms(tmpd, trans.app.config.umask, 0o777, self.app.config.gid) tmpf = os.path.join(tmpd, 'library_download.' + format) if trans.app.config.upstream_gzip: archive = zipfile.ZipFile(tmpf, 'w', zipfile.ZIP_STORED, True) else: archive = zipfile.ZipFile(tmpf, 'w', zipfile.ZIP_DEFLATED, True) archive.add = lambda x, y: archive.write( x, y.encode('CP437')) elif format == 'tgz': if trans.app.config.upstream_gzip: archive = StreamBall('w|') outext = 'tar' else: archive = StreamBall('w|gz') outext = 'tgz' elif format == 'tbz': archive = StreamBall('w|bz2') outext = 'tbz2' except (OSError, zipfile.BadZipfile): log.exception("Unable to create archive for download") raise exceptions.InternalServerError( "Unable to create archive for download.") except Exception: log.exception( "Unexpected error in create archive for download") raise exceptions.InternalServerError( "Unable to create archive for download.") composite_extensions = trans.app.datatypes_registry.get_composite_extensions( ) seen = [] for ld in library_datasets: ldda = ld.library_dataset_dataset_association ext = ldda.extension is_composite = ext in composite_extensions path = "" parent_folder = ldda.library_dataset.folder while parent_folder is not None: # Exclude the now-hidden "root folder" if parent_folder.parent is None: path = os.path.join(parent_folder.library_root[0].name, path) break path = os.path.join(parent_folder.name, path) parent_folder = parent_folder.parent path += ldda.name while path in seen: path += '_' path = "{path}.{extension}".format(path=path, extension=ldda.extension) seen.append(path) zpath = os.path.split(path)[-1] # comes as base_name/fname outfname, zpathext = os.path.splitext(zpath) if is_composite: # need to add all the components from the extra_files_path to the zip if zpathext == '': zpath = '%s.html' % zpath # fake the real nature of the html file try: if format == 'zip': archive.add( ldda.dataset.file_name, zpath) # add the primary of a composite set else: archive.add(ldda.dataset.file_name, zpath, check_file=True ) # add the primary of a composite set except IOError: log.exception( "Unable to add composite parent %s to temporary library download archive", ldda.dataset.file_name) raise exceptions.InternalServerError( "Unable to create archive for download.") except ObjectNotFound: log.exception( "Requested dataset %s does not exist on the host.", ldda.dataset.file_name) raise exceptions.ObjectNotFound( "Requested dataset not found. ") except Exception as e: log.exception( "Unable to add composite parent %s to temporary library download archive", ldda.dataset.file_name) raise exceptions.InternalServerError( "Unable to add composite parent to temporary library download archive. " + util.unicodify(e)) flist = glob.glob( os.path.join(ldda.dataset.extra_files_path, '*.*')) # glob returns full paths for fpath in flist: efp, fname = os.path.split(fpath) if fname > '': fname = fname.translate(trantab) try: if format == 'zip': archive.add(fpath, fname) else: archive.add(fpath, fname, check_file=True) except IOError: log.exception( "Unable to add %s to temporary library download archive %s", fname, outfname) raise exceptions.InternalServerError( "Unable to create archive for download.") except ObjectNotFound: log.exception( "Requested dataset %s does not exist on the host.", fpath) raise exceptions.ObjectNotFound( "Requested dataset not found.") except Exception as e: log.exception( "Unable to add %s to temporary library download archive %s", fname, outfname) raise exceptions.InternalServerError( "Unable to add dataset to temporary library download archive . " + util.unicodify(e)) else: try: if format == 'zip': archive.add(ldda.dataset.file_name, path) else: archive.add(ldda.dataset.file_name, path, check_file=True) except IOError: log.exception( "Unable to write %s to temporary library download archive", ldda.dataset.file_name) raise exceptions.InternalServerError( "Unable to create archive for download") except ObjectNotFound: log.exception( "Requested dataset %s does not exist on the host.", ldda.dataset.file_name) raise exceptions.ObjectNotFound( "Requested dataset not found.") except Exception as e: log.exception( "Unable to add %s to temporary library download archive %s", ldda.dataset.file_name, outfname) raise exceptions.InternalServerError( "Unknown error. " + util.unicodify(e)) lname = 'selected_dataset' fname = lname.replace(' ', '_') + '_files' if format == 'zip': archive.close() trans.response.set_content_type("application/octet-stream") trans.response.headers[ "Content-Disposition"] = 'attachment; filename="%s.%s"' % ( fname, outext) archive = util.streamball.ZipBall(tmpf, tmpd) archive.wsgi_status = trans.response.wsgi_status() archive.wsgi_headeritems = trans.response.wsgi_headeritems() return archive.stream else: trans.response.set_content_type("application/x-tar") trans.response.headers[ "Content-Disposition"] = 'attachment; filename="%s.%s"' % ( fname, outext) archive.wsgi_status = trans.response.wsgi_status() archive.wsgi_headeritems = trans.response.wsgi_headeritems() return archive.stream elif format == 'uncompressed': if len(library_datasets) != 1: raise exceptions.RequestParameterInvalidException( "You can download only one uncompressed file at once.") else: single_ld = library_datasets[0] ldda = single_ld.library_dataset_dataset_association dataset = ldda.dataset fStat = os.stat(dataset.file_name) trans.response.set_content_type(ldda.get_mime()) trans.response.headers['Content-Length'] = int(fStat.st_size) fname = "{path}.{extension}".format(path=ldda.name, extension=ldda.extension) fname = ''.join(c in util.FILENAME_VALID_CHARS and c or '_' for c in fname)[0:150] trans.response.headers[ "Content-Disposition"] = 'attachment; filename="%s"' % fname try: return open(dataset.file_name, 'rb') except Exception: raise exceptions.InternalServerError( "This dataset contains no content.") else: raise exceptions.RequestParameterInvalidException( "Wrong format parameter specified")
def _archive_composite_dataset(self, trans, data=None, **kwd): # save a composite object into a compressed archive for downloading params = util.Params(kwd) outfname = data.name[0:150] outfname = ''.join(c in FILENAME_VALID_CHARS and c or '_' for c in outfname) if params.do_action is None: params.do_action = 'zip' # default msg = util.restore_text(params.get('msg', '')) if not data: msg = "You must select at least one dataset" else: error = False try: if params.do_action == 'zip': # Can't use mkstemp - the file must not exist first tmpd = tempfile.mkdtemp(dir=trans.app.config.new_file_path, prefix='gx_composite_archive_') util.umask_fix_perms(tmpd, trans.app.config.umask, 0o777, trans.app.config.gid) tmpf = os.path.join(tmpd, 'library_download.' + params.do_action) archive = zipfile.ZipFile(tmpf, 'w', zipfile.ZIP_DEFLATED, True) def zipfile_add(fpath, arcname): encoded_arcname = arcname.encode('CP437') try: archive.write(fpath, encoded_arcname) except TypeError: # Despite documenting the need for CP437 encoded arcname, # python 3 actually needs this to be a unicode string ... # https://bugs.python.org/issue24110 archive.write(fpath, arcname) archive.add = zipfile_add elif params.do_action == 'tgz': archive = util.streamball.StreamBall('w|gz') elif params.do_action == 'tbz': archive = util.streamball.StreamBall('w|bz2') except (OSError, zipfile.BadZipFile): error = True log.exception("Unable to create archive for download") msg = "Unable to create archive for %s for download, please report this error" % outfname if not error: ext = data.extension path = data.file_name efp = data.extra_files_path # Add any central file to the archive, display_name = os.path.splitext(outfname)[0] if not display_name.endswith(ext): display_name = '%s_%s' % (display_name, ext) error, msg = self._archive_main_file(archive, display_name, path)[:2] if not error: # Add any child files to the archive, for fpath, rpath in self.__archive_extra_files_path(extra_files_path=efp): try: archive.add(fpath, rpath) except IOError: error = True log.exception("Unable to add %s to temporary library download archive", rpath) msg = "Unable to create archive for download, please report this error" continue if not error: if params.do_action == 'zip': archive.close() tmpfh = open(tmpf, 'rb') # CANNOT clean up - unlink/rmdir was always failing because file handle retained to return - must rely on a cron job to clean up tmp trans.response.set_content_type("application/x-zip-compressed") trans.response.headers["Content-Disposition"] = 'attachment; filename="%s.zip"' % outfname return tmpfh else: trans.response.set_content_type("application/x-tar") outext = 'tgz' if params.do_action == 'tbz': outext = 'tbz' trans.response.headers["Content-Disposition"] = 'attachment; filename="%s.%s"' % (outfname, outext) archive.wsgi_status = trans.response.wsgi_status() archive.wsgi_headeritems = trans.response.wsgi_headeritems() return archive.stream return trans.show_error_message(msg)
def finish( self, stdout, stderr ): """ Called to indicate that the associated command has been run. Updates the output datasets based on stderr and stdout from the command, and the contents of the output files. """ # default post job setup self.sa_session.expunge_all() job = self.get_job() try: self.reclaim_ownership() except: self.fail( job.info ) log.exception( '(%s) Failed to change ownership of %s, failing' % ( job.id, self.working_directory ) ) # if the job was deleted, don't finish it if job.state == job.states.DELETED or job.state == job.states.ERROR: #ERROR at this point means the job was deleted by an administrator. return self.fail( job.info ) if stderr: job.state = job.states.ERROR else: job.state = job.states.OK if self.version_string_cmd: version_filename = self.get_version_string_path() if os.path.exists(version_filename): self.version_string = open(version_filename).read() os.unlink(version_filename) if self.app.config.outputs_to_working_directory and not self.__link_file_check(): for dataset_path in self.get_output_fnames(): try: shutil.move( dataset_path.false_path, dataset_path.real_path ) log.debug( "finish(): Moved %s to %s" % ( dataset_path.false_path, dataset_path.real_path ) ) except ( IOError, OSError ): # this can happen if Galaxy is restarted during the job's # finish method - the false_path file has already moved, # and when the job is recovered, it won't be found. if os.path.exists( dataset_path.real_path ) and os.stat( dataset_path.real_path ).st_size > 0: log.warning( "finish(): %s not found, but %s is not empty, so it will be used instead" % ( dataset_path.false_path, dataset_path.real_path ) ) else: return self.fail( "Job %s's output dataset(s) could not be read" % job.id ) job_context = ExpressionContext( dict( stdout = stdout, stderr = stderr ) ) job_tool = self.app.toolbox.tools_by_id.get( job.tool_id, None ) def in_directory( file, directory ): # Make both absolute. directory = os.path.abspath( directory ) file = os.path.abspath( file ) #Return true, if the common prefix of both is equal to directory #e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b return os.path.commonprefix( [ file, directory ] ) == directory for dataset_assoc in job.output_datasets + job.output_library_datasets: context = self.get_dataset_finish_context( job_context, dataset_assoc.dataset.dataset ) #should this also be checking library associations? - can a library item be added from a history before the job has ended? - lets not allow this to occur for dataset in dataset_assoc.dataset.dataset.history_associations + dataset_assoc.dataset.dataset.library_associations: #need to update all associated output hdas, i.e. history was shared with job running # # If HDA is to be copied from the working directory, do it now so that other attributes are correctly set. # if isinstance( dataset, model.HistoryDatasetAssociation ): joda = self.sa_session.query( model.JobToOutputDatasetAssociation ).filter_by( job=job, dataset=dataset ).first() if joda and job_tool: hda_tool_output = job_tool.outputs.get( joda.name, None ) if hda_tool_output and hda_tool_output.from_work_dir: # Copy from working dir to HDA. source_file = os.path.join( os.path.abspath( self.working_directory ), hda_tool_output.from_work_dir ) if in_directory( source_file, self.working_directory ): try: shutil.move( source_file, dataset.file_name ) log.debug( "finish(): Moved %s to %s as directed by from_work_dir" % ( source_file, dataset.file_name ) ) except ( IOError, OSError ): log.debug( "finish(): Could not move %s to %s as directed by from_work_dir" % ( source_file, dataset.file_name ) ) else: # Security violation. log.exception( "from_work_dir specified a location not in the working directory: %s, %s" % ( source_file, self.working_directory ) ) dataset.blurb = 'done' dataset.peek = 'no peek' dataset.info = ( dataset.info or '' ) + context['stdout'] + context['stderr'] dataset.tool_version = self.version_string dataset.set_size() # Update (non-library) job output datasets through the object store if dataset not in job.output_library_datasets: self.app.object_store.update_from_file(dataset.dataset, create=True) if context['stderr']: dataset.blurb = "error" elif dataset.has_data(): # If the tool was expected to set the extension, attempt to retrieve it if dataset.ext == 'auto': dataset.extension = context.get( 'ext', 'data' ) dataset.init_meta( copy_from=dataset ) #if a dataset was copied, it won't appear in our dictionary: #either use the metadata from originating output dataset, or call set_meta on the copies #it would be quicker to just copy the metadata from the originating output dataset, #but somewhat trickier (need to recurse up the copied_from tree), for now we'll call set_meta() if not self.app.config.set_metadata_externally or \ ( not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) \ and self.app.config.retry_metadata_internally ): dataset.set_meta( overwrite = False ) elif not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) and not context['stderr']: dataset._state = model.Dataset.states.FAILED_METADATA else: #load metadata from file #we need to no longer allow metadata to be edited while the job is still running, #since if it is edited, the metadata changed on the running output will no longer match #the metadata that was stored to disk for use via the external process, #and the changes made by the user will be lost, without warning or notice dataset.metadata.from_JSON_dict( self.external_output_metadata.get_output_filenames_by_dataset( dataset, self.sa_session ).filename_out ) try: assert context.get( 'line_count', None ) is not None if ( not dataset.datatype.composite_type and dataset.dataset.is_multi_byte() ) or self.tool.is_multi_byte: dataset.set_peek( line_count=context['line_count'], is_multi_byte=True ) else: dataset.set_peek( line_count=context['line_count'] ) except: if ( not dataset.datatype.composite_type and dataset.dataset.is_multi_byte() ) or self.tool.is_multi_byte: dataset.set_peek( is_multi_byte=True ) else: dataset.set_peek() try: # set the name if provided by the tool dataset.name = context['name'] except: pass else: dataset.blurb = "empty" if dataset.ext == 'auto': dataset.extension = 'txt' self.sa_session.add( dataset ) if context['stderr']: dataset_assoc.dataset.dataset.state = model.Dataset.states.ERROR else: dataset_assoc.dataset.dataset.state = model.Dataset.states.OK # If any of the rest of the finish method below raises an # exception, the fail method will run and set the datasets to # ERROR. The user will never see that the datasets are in error if # they were flushed as OK here, since upon doing so, the history # panel stops checking for updates. So allow the # self.sa_session.flush() at the bottom of this method set # the state instead. for pja in job.post_job_actions: ActionBox.execute(self.app, self.sa_session, pja.post_job_action, job) # Flush all the dataset and job changes above. Dataset state changes # will now be seen by the user. self.sa_session.flush() # Save stdout and stderr if len( stdout ) > 32768: log.error( "stdout for job %d is greater than 32K, only first part will be logged to database" % job.id ) job.stdout = stdout[:32768] if len( stderr ) > 32768: log.error( "stderr for job %d is greater than 32K, only first part will be logged to database" % job.id ) job.stderr = stderr[:32768] # custom post process setup inp_data = dict( [ ( da.name, da.dataset ) for da in job.input_datasets ] ) out_data = dict( [ ( da.name, da.dataset ) for da in job.output_datasets ] ) inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] ) out_data.update( [ ( da.name, da.dataset ) for da in job.output_library_datasets ] ) param_dict = dict( [ ( p.name, p.value ) for p in job.parameters ] ) # why not re-use self.param_dict here? ##dunno...probably should, this causes tools.parameters.basic.UnvalidatedValue to be used in following methods instead of validated and transformed values during i.e. running workflows param_dict = self.tool.params_from_strings( param_dict, self.app ) # Check for and move associated_files self.tool.collect_associated_files(out_data, self.working_directory) gitd = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first() if gitd: self.tool.collect_associated_files({'' : gitd}, self.working_directory) # Create generated output children and primary datasets and add to param_dict collected_datasets = {'children':self.tool.collect_child_datasets(out_data, self.working_directory),'primary':self.tool.collect_primary_datasets(out_data, self.working_directory)} param_dict.update({'__collected_datasets__':collected_datasets}) # Certain tools require tasks to be completed after job execution # ( this used to be performed in the "exec_after_process" hook, but hooks are deprecated ). self.tool.exec_after_process( self.queue.app, inp_data, out_data, param_dict, job = job ) # Call 'exec_after_process' hook self.tool.call_hook( 'exec_after_process', self.queue.app, inp_data=inp_data, out_data=out_data, param_dict=param_dict, tool=self.tool, stdout=stdout, stderr=stderr ) job.command_line = self.command_line bytes = 0 # Once datasets are collected, set the total dataset size (includes extra files) for dataset_assoc in job.output_datasets: dataset_assoc.dataset.dataset.set_total_size() bytes += dataset_assoc.dataset.dataset.get_total_size() if job.user: job.user.total_disk_usage += bytes # fix permissions for path in [ dp.real_path for dp in self.get_output_fnames() ]: util.umask_fix_perms( path, self.app.config.umask, 0666, self.app.config.gid ) self.sa_session.flush() log.debug( 'job %d ended' % self.job_id ) if self.app.config.cleanup_job == 'always' or ( not stderr and self.app.config.cleanup_job == 'onsuccess' ): self.cleanup()