Exemple #1
0
def validate_server_directory_upload(trans, server_dir):
    if server_dir in [None, 'None', '']:
        raise RequestParameterInvalidException("Invalid or unspecified server_dir parameter")

    if trans.user_is_admin:
        import_dir = trans.app.config.library_import_dir
        import_dir_desc = 'library_import_dir'
        if not import_dir:
            raise ConfigDoesNotAllowException('"library_import_dir" is not set in the Galaxy configuration')
    else:
        import_dir = trans.app.config.user_library_import_dir
        if not import_dir:
            raise ConfigDoesNotAllowException('"user_library_import_dir" is not set in the Galaxy configuration')
        if server_dir != trans.user.email:
            import_dir = os.path.join(import_dir, trans.user.email)
        import_dir_desc = 'user_library_import_dir'

    full_dir = os.path.join(import_dir, server_dir)
    unsafe = None
    if safe_relpath(server_dir):
        username = trans.user.username if trans.app.config.user_library_import_check_permissions else None
        if import_dir_desc == 'user_library_import_dir' and safe_contains(import_dir, full_dir, whitelist=trans.app.config.user_library_import_symlink_whitelist):
            for unsafe in unsafe_walk(full_dir, whitelist=[import_dir] + trans.app.config.user_library_import_symlink_whitelist, username=username):
                log.error('User attempted to import a path that resolves to a path outside of their import dir: %s -> %s', unsafe, os.path.realpath(unsafe))
    else:
        log.error('User attempted to import a directory path that resolves to a path outside of their import dir: %s -> %s', server_dir, os.path.realpath(full_dir))
        unsafe = True
    if unsafe:
        raise RequestParameterInvalidException("Invalid server_dir specified")

    return full_dir, import_dir_desc
Exemple #2
0
    def _construct_path(self, obj, base_dir=None, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs):
        # extra_dir should never be constructed from provided data but just
        # make sure there are no shenannigans afoot
        if extra_dir and extra_dir != os.path.normpath(extra_dir):
            log.warning('extra_dir is not normalized: %s', extra_dir)
            raise ObjectInvalid("The requested object is invalid")
        # ensure that any parent directory references in alt_name would not
        # result in a path not contained in the directory path constructed here
        if alt_name:
            if not safe_relpath(alt_name):
                log.warning('alt_name would locate path outside dir: %s', alt_name)
                raise ObjectInvalid("The requested object is invalid")
            # alt_name can contain parent directory references, but S3 will not
            # follow them, so if they are valid we normalize them out
            alt_name = os.path.normpath(alt_name)
        rel_path = os.path.join(*directory_hash_id(self._get_object_id(obj)))
        if extra_dir is not None:
            if extra_dir_at_root:
                rel_path = os.path.join(extra_dir, rel_path)
            else:
                rel_path = os.path.join(rel_path, extra_dir)

        # for JOB_WORK directory
        if obj_dir:
            rel_path = os.path.join(rel_path, str(self._get_object_id(obj)))
        if base_dir:
            base = self.extra_dirs.get(base_dir)
            return os.path.join(base, rel_path)

        # S3 folders are marked by having trailing '/' so add it now
        rel_path = '%s/' % rel_path

        if not dir_only:
            rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % self._get_object_id(obj))
        return rel_path
Exemple #3
0
 def safemembers(self):
     members = self.archive
     if self.file_type == "tar":
         for finfo in members:
             if not safe_relpath(finfo.name):
                 raise Exception(finfo.name + " is blocked (illegal path).")
             elif (finfo.issym() or finfo.islnk()) and not safe_relpath(finfo.linkname):
                 raise Exception(finfo.name + " is blocked.")
             else:
                 yield finfo
     elif self.file_type == "zip":
         for name in members.namelist():
             if not safe_relpath(name):
                 raise Exception(name + " is blocked (illegal path).")
             else:
                 yield name
Exemple #4
0
    def _construct_path(self, obj, base_dir=None, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs):
        # extra_dir should never be constructed from provided data but just
        # make sure there are no shenannigans afoot
        if extra_dir and extra_dir != os.path.normpath(extra_dir):
            log.warning('extra_dir is not normalized: %s', extra_dir)
            raise ObjectInvalid("The requested object is invalid")
        # ensure that any parent directory references in alt_name would not
        # result in a path not contained in the directory path constructed here
        if alt_name:
            if not safe_relpath(alt_name):
                log.warning('alt_name would locate path outside dir: %s', alt_name)
                raise ObjectInvalid("The requested object is invalid")
            # alt_name can contain parent directory references, but S3 will not
            # follow them, so if they are valid we normalize them out
            alt_name = os.path.normpath(alt_name)
        rel_path = os.path.join(*directory_hash_id(obj.id))
        if extra_dir is not None:
            if extra_dir_at_root:
                rel_path = os.path.join(extra_dir, rel_path)
            else:
                rel_path = os.path.join(rel_path, extra_dir)

        # for JOB_WORK directory
        if obj_dir:
            rel_path = os.path.join(rel_path, str(obj.id))
        if base_dir:
            base = self.extra_dirs.get(base_dir)
            return os.path.join(base, rel_path)

        # S3 folders are marked by having trailing '/' so add it now
        rel_path = '%s/' % rel_path

        if not dir_only:
            rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id)
        return rel_path
Exemple #5
0
def validate_server_directory_upload(trans, server_dir):
    if server_dir in [None, 'None', '']:
        raise RequestParameterInvalidException("Invalid or unspecified server_dir parameter")

    if trans.user_is_admin():
        import_dir = trans.app.config.library_import_dir
        import_dir_desc = 'library_import_dir'
        if not import_dir:
            raise ConfigDoesNotAllowException('"library_import_dir" is not set in the Galaxy configuration')
    else:
        import_dir = trans.app.config.user_library_import_dir
        if not import_dir:
            raise ConfigDoesNotAllowException('"user_library_import_dir" is not set in the Galaxy configuration')
        if server_dir != trans.user.email:
            import_dir = os.path.join(import_dir, trans.user.email)
        import_dir_desc = 'user_library_import_dir'

    full_dir = os.path.join(import_dir, server_dir)
    unsafe = None
    if safe_relpath(server_dir):
        username = trans.user.username if trans.app.config.user_library_import_check_permissions else None
        if import_dir_desc == 'user_library_import_dir' and safe_contains(import_dir, full_dir, whitelist=trans.app.config.user_library_import_symlink_whitelist, username=username):
            for unsafe in unsafe_walk(full_dir, whitelist=[import_dir] + trans.app.config.user_library_import_symlink_whitelist):
                log.error('User attempted to import a path that resolves to a path outside of their import dir: %s -> %s', unsafe, os.path.realpath(unsafe))
    else:
        log.error('User attempted to import a directory path that resolves to a path outside of their import dir: %s -> %s', server_dir, os.path.realpath(full_dir))
        unsafe = True
    if unsafe:
        raise RequestParameterInvalidException("Invalid server_dir specified")

    return full_dir, import_dir_desc
Exemple #6
0
    def __get_rods_path(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, strip_dat=True, **kwargs):
        # extra_dir should never be constructed from provided data but just
        # make sure there are no shenannigans afoot
        if extra_dir and extra_dir != os.path.normpath(extra_dir):
            log.warning('extra_dir is not normalized: %s', extra_dir)
            raise ObjectInvalid("The requested object is invalid")
        # ensure that any parent directory references in alt_name would not
        # result in a path not contained in the directory path constructed here
        if alt_name:
            if not safe_relpath(alt_name):
                log.warning('alt_name would locate path outside dir: %s', alt_name)
                raise ObjectInvalid("The requested object is invalid")
            # alt_name can contain parent directory references, but iRODS will
            # not follow them, so if they are valid we normalize them out
            alt_name = os.path.normpath(alt_name)
        path = ""
        if extra_dir is not None:
            path = extra_dir

        # extra_dir_at_root is ignored - since the iRODS plugin does not use
        # the directory hash, there is only one level of subdirectory.

        if not dir_only:
            # the .dat extension is stripped when stored in iRODS
            # TODO: is the strip_dat kwarg the best way to implement this?
            if strip_dat and alt_name and alt_name.endswith('.dat'):
                alt_name = os.path.splitext(alt_name)[0]
            default_name = 'dataset_%s' % obj.id
            if not strip_dat:
                default_name += '.dat'
            path = path_join(path, alt_name if alt_name else default_name)

        path = path_join(self.root_collection_path, path)
        return path
Exemple #7
0
 def safemembers(self):
     members = self.archive
     if self.file_type == "tar":
         for finfo in members:
             if not safe_relpath(finfo.name):
                 raise Exception(finfo.name + " is blocked (illegal path).")
             elif (finfo.issym()
                   or finfo.islnk()) and not safe_relpath(finfo.linkname):
                 raise Exception(finfo.name + " is blocked.")
             else:
                 yield finfo
     elif self.file_type == "zip":
         for name in members.namelist():
             if not safe_relpath(name):
                 raise Exception(name + " is blocked (illegal path).")
             else:
                 yield name
Exemple #8
0
 def safemembers(self):
     members = self.archive
     common_prefix_dir = self.common_prefix_dir
     if self.file_type == "tar":
         for finfo in members:
             if not safe_relpath(finfo.name):
                 raise Exception(f"Path '{finfo.name}' is blocked (illegal path).")
             if finfo.issym() or finfo.islnk():
                 link_target = os.path.join(os.path.dirname(finfo.name), finfo.linkname)
                 if not safe_relpath(link_target) or not os.path.normpath(link_target).startswith(common_prefix_dir):
                     raise Exception(f"Link '{finfo.name}' to '{finfo.linkname}' is blocked.")
             yield finfo
     elif self.file_type == "zip":
         for name in members.namelist():
             if not safe_relpath(name):
                 raise Exception(f"{name} is blocked (illegal path).")
             yield name
Exemple #9
0
def check_archive(repository, archive):
    valid = []
    invalid = []
    errors = []
    undesirable_files = []
    undesirable_dirs = []
    for member in archive.getmembers():
        # Allow regular files and directories only
        if not (member.isdir() or member.isfile() or member.islnk()):
            errors.append(
                "Uploaded archives can only include regular directories and files (no symbolic links, devices, etc)."
            )
            invalid.append(member)
            continue
        if not safe_relpath(member.name):
            errors.append(
                "Uploaded archives cannot contain files that would extract outside of the archive."
            )
            invalid.append(member)
            continue
        if os.path.basename(member.name) in UNDESIRABLE_FILES:
            undesirable_files.append(member)
            continue
        head = tail = member.name
        found_undesirable_dir = False
        while tail:
            head, tail = os.path.split(head)
            if tail in UNDESIRABLE_DIRS:
                undesirable_dirs.append(member)
                found_undesirable_dir = True
                break
        if found_undesirable_dir:
            continue
        if repository.type == rt_util.REPOSITORY_SUITE_DEFINITION and member.name != rt_util.REPOSITORY_DEPENDENCY_DEFINITION_FILENAME:
            errors.append(
                'Repositories of type <b>Repository suite definition</b> can contain only a single file named <b>repository_dependencies.xml</b>.'
            )
            invalid.append(member)
            continue
        if repository.type == rt_util.TOOL_DEPENDENCY_DEFINITION and member.name != rt_util.TOOL_DEPENDENCY_DEFINITION_FILENAME:
            errors.append(
                'Repositories of type <b>Tool dependency definition</b> can contain only a single file named <b>tool_dependencies.xml</b>.'
            )
            invalid.append(member)
            continue
        valid.append(member)
    ArchiveCheckResults = namedtuple('ArchiveCheckResults', [
        'valid', 'invalid', 'undesirable_files', 'undesirable_dirs', 'errors'
    ])
    return ArchiveCheckResults(valid, invalid, undesirable_files,
                               undesirable_dirs, errors)
    def _construct_path(self,
                        obj,
                        base_dir=None,
                        dir_only=None,
                        extra_dir=None,
                        extra_dir_at_root=False,
                        alt_name=None,
                        obj_dir=False,
                        **kwargs):
        """Construct path from object and parameters"""
        # param extra_dir: should never be constructed from provided data but
        # just make sure there are no shenannigans afoot
        if extra_dir and extra_dir != os.path.normpath(extra_dir):
            log.warning('extra_dir is not normalized: {0}'.format(extra_dir))
            raise ObjectInvalid("The requested object is invalid")
        # ensure that any parent directory references in alt_name would not
        # result in a path not contained in the directory path constructed here
        if alt_name:
            if not safe_relpath(alt_name):
                log.warning(
                    'alt_name would locate path outside dir: {0}'.format(
                        alt_name))
                raise ObjectInvalid("The requested object is invalid")
            # alt_name can contain parent directory references, but S3 will not
            # follow them, so if they are valid we normalize them out
            alt_name = os.path.normpath(alt_name)
        rel_path = os.path.join(*directory_hash_id(obj.id))
        if extra_dir is not None:
            if extra_dir_at_root:
                rel_path = os.path.join(extra_dir, rel_path)
            else:
                rel_path = os.path.join(rel_path, extra_dir)

        # for JOB_WORK directory
        if obj_dir:
            rel_path = os.path.join(rel_path, str(obj.id))
        if base_dir:
            base = self.extra_dirs.get(base_dir)
            return os.path.join(base, rel_path)

        # Pithos+ folders are marked by having trailing '/' so add it now
        rel_path = '{0}/'.format(rel_path)

        if not dir_only:
            an = alt_name if alt_name else 'dataset_{0}.dat'.format(obj.id)
            rel_path = os.path.join(rel_path, an)
        return rel_path
Exemple #11
0
    def __get_rods_path(self,
                        obj,
                        base_dir=None,
                        dir_only=False,
                        extra_dir=None,
                        extra_dir_at_root=False,
                        alt_name=None,
                        strip_dat=True,
                        **kwargs):
        # extra_dir should never be constructed from provided data but just
        # make sure there are no shenannigans afoot
        if extra_dir and extra_dir != os.path.normpath(extra_dir):
            log.warning('extra_dir is not normalized: %s', extra_dir)
            raise ObjectInvalid("The requested object is invalid")
        # ensure that any parent directory references in alt_name would not
        # result in a path not contained in the directory path constructed here
        if alt_name:
            if not safe_relpath(alt_name):
                log.warning('alt_name would locate path outside dir: %s',
                            alt_name)
                raise ObjectInvalid("The requested object is invalid")
            # alt_name can contain parent directory references, but iRODS will
            # not follow them, so if they are valid we normalize them out
            alt_name = os.path.normpath(alt_name)
        path = ""
        if extra_dir is not None:
            path = extra_dir

        # extra_dir_at_root is ignored - since the iRODS plugin does not use
        # the directory hash, there is only one level of subdirectory.

        if not dir_only:
            # the .dat extension is stripped when stored in iRODS
            # TODO: is the strip_dat kwarg the best way to implement this?
            if strip_dat and alt_name and alt_name.endswith('.dat'):
                alt_name = os.path.splitext(alt_name)[0]
            default_name = 'dataset_%s' % obj.id
            if not strip_dat:
                default_name += '.dat'
            path = path_join(path, alt_name if alt_name else default_name)

        path = path_join(self.root_collection_path, path)
        return path
def check_archive(repository, archive):
    valid = []
    invalid = []
    errors = []
    undesirable_files = []
    undesirable_dirs = []
    for member in archive.getmembers():
        # Allow regular files and directories only
        if not (member.isdir() or member.isfile() or member.islnk()):
            errors.append("Uploaded archives can only include regular directories and files (no symbolic links, devices, etc).")
            invalid.append(member)
            continue
        if not safe_relpath(member.name):
            errors.append("Uploaded archives cannot contain files that would extract outside of the archive.")
            invalid.append(member)
            continue
        if os.path.basename(member.name) in UNDESIRABLE_FILES:
            undesirable_files.append(member)
            continue
        head = tail = member.name
        try:
            while tail:
                head, tail = os.path.split(head)
                if tail in UNDESIRABLE_DIRS:
                    undesirable_dirs.append(member)
                    assert False
        except AssertionError:
            continue
        if repository.type == rt_util.REPOSITORY_SUITE_DEFINITION and member.name != rt_util.REPOSITORY_DEPENDENCY_DEFINITION_FILENAME:
            errors.append('Repositories of type <b>Repository suite definition</b> can contain only a single file named <b>repository_dependencies.xml</b>.')
            invalid.append(member)
            continue
        if repository.type == rt_util.TOOL_DEPENDENCY_DEFINITION and member.name != rt_util.TOOL_DEPENDENCY_DEFINITION_FILENAME:
            errors.append('Repositories of type <b>Tool dependency definition</b> can contain only a single file named <b>tool_dependencies.xml</b>.')
            invalid.append(member)
            continue
        valid.append(member)
    ArchiveCheckResults = namedtuple('ArchiveCheckResults', ['valid', 'invalid', 'undesirable_files', 'undesirable_dirs', 'errors'])
    return ArchiveCheckResults(valid, invalid, undesirable_files, undesirable_dirs, errors)
Exemple #13
0
    def _construct_path(self,
                        obj,
                        old_style=False,
                        base_dir=None,
                        dir_only=False,
                        extra_dir=None,
                        extra_dir_at_root=False,
                        alt_name=None,
                        obj_dir=False,
                        **kwargs):
        """
        Construct the absolute path for accessing the object identified by `obj.id`.

        :type base_dir: string
        :param base_dir: A key in self.extra_dirs corresponding to the base
                         directory in which this object should be created, or
                         None to specify the default directory.

        :type dir_only: boolean
        :param dir_only: If True, check only the path where the file
                         identified by `obj` should be located, not the
                         dataset itself. This option applies to `extra_dir`
                         argument as well.

        :type extra_dir: string
        :param extra_dir: Append the value of this parameter to the expected
            path used to access the object identified by `obj` (e.g.,
            /files/000/<extra_dir>/dataset_10.dat).

        :type alt_name: string
        :param alt_name: Use this name as the alternative name for the returned
                         dataset rather than the default.

        :type old_style: boolean
        param old_style: This option is used for backward compatibility. If
            `True` then the composed directory structure does not include a
            hash id (e.g., /files/dataset_10.dat (old) vs.
            /files/000/dataset_10.dat (new))
        """
        base = os.path.abspath(self.extra_dirs.get(base_dir, self.file_path))
        # extra_dir should never be constructed from provided data but just
        # make sure there are no shenannigans afoot
        if extra_dir and extra_dir != os.path.normpath(extra_dir):
            log.warning('extra_dir is not normalized: %s', extra_dir)
            raise ObjectInvalid("The requested object is invalid")
        # ensure that any parent directory references in alt_name would not
        # result in a path not contained in the directory path constructed here
        if alt_name and not safe_relpath(alt_name):
            log.warning('alt_name would locate path outside dir: %s', alt_name)
            raise ObjectInvalid("The requested object is invalid")
        obj_id = self._get_object_id(obj)
        if old_style:
            if extra_dir is not None:
                path = os.path.join(base, extra_dir)
            else:
                path = base
        else:
            # Construct hashed path
            rel_path = os.path.join(*directory_hash_id(obj_id))
            # Create a subdirectory for the object ID
            if obj_dir:
                rel_path = os.path.join(rel_path, str(obj_id))
            # Optionally append extra_dir
            if extra_dir is not None:
                if extra_dir_at_root:
                    rel_path = os.path.join(extra_dir, rel_path)
                else:
                    rel_path = os.path.join(rel_path, extra_dir)
            path = os.path.join(base, rel_path)
        if not dir_only:
            assert obj_id is not None, "The effective dataset identifier consumed by object store [%s] must be set before a path can be constructed." % (
                self.store_by)
            path = os.path.join(
                path, alt_name if alt_name else "dataset_%s.dat" % obj_id)
        return os.path.abspath(path)
Exemple #14
0
    def load(self, trans, payload=None, **kwd):
        """
        Load dataset(s) from the given source into the library.

        * POST /api/libraries/datasets

        :param   payload: dictionary structure containing:
            :param  encoded_folder_id:      the encoded id of the folder to import dataset(s) to
            :type   encoded_folder_id:      an encoded id string
            :param  source:                 source the datasets should be loaded from
                    Source can be:
                        user directory - root folder specified in galaxy.ini as "$user_library_import_dir"
                            example path: path/to/galaxy/$user_library_import_dir/[email protected]/{user can browse everything here}
                            the folder with the user login has to be created beforehand
                        (admin)import directory - root folder specified in galaxy ini as "$library_import_dir"
                            example path: path/to/galaxy/$library_import_dir/{admin can browse everything here}
                        (admin)any absolute or relative path - option allowed with "allow_library_path_paste" in galaxy.ini
            :type   source:                 str
            :param  link_data:              flag whether to link the dataset to data or copy it to Galaxy, defaults to copy
                                            while linking is set to True all symlinks will be resolved _once_
            :type   link_data:              bool
            :param  preserve_dirs:          flag whether to preserve the directory structure when importing dir
                                            if False only datasets will be imported
            :type   preserve_dirs:          bool
            :param  file_type:              file type of the loaded datasets, defaults to 'auto' (autodetect)
            :type   file_type:              str
            :param  dbkey:                  dbkey of the loaded genome, defaults to '?' (unknown)
            :type   dbkey:                  str
            :param  tag_using_filenames:    flag whether to generate dataset tags from filenames
            :type   tag_using_filenames:    bool
        :type   dictionary

        :returns:   dict containing information about the created upload job
        :rtype:     dictionary

        :raises: RequestParameterMissingException, AdminRequiredException, ConfigDoesNotAllowException, RequestParameterInvalidException
                    InsufficientPermissionsException, ObjectNotFound
        """
        if payload:
            kwd.update(payload)
        kwd['space_to_tab'] = False
        kwd['to_posix_lines'] = True
        kwd['dbkey'] = kwd.get('dbkey', '?')
        kwd['file_type'] = kwd.get('file_type', 'auto')
        kwd['link_data_only'] = 'link_to_files' if util.string_as_bool(kwd.get('link_data', False)) else 'copy_files'
        kwd['tag_using_filenames'] = util.string_as_bool(kwd.get('tag_using_filenames', None))
        encoded_folder_id = kwd.get('encoded_folder_id', None)
        if encoded_folder_id is not None:
            folder_id = self.folder_manager.cut_and_decode(trans, encoded_folder_id)
        else:
            raise exceptions.RequestParameterMissingException('The required attribute encoded_folder_id is missing.')
        path = kwd.get('path', None)
        if path is None:
            raise exceptions.RequestParameterMissingException('The required attribute path is missing.')
        folder = self.folder_manager.get(trans, folder_id)

        source = kwd.get('source', None)
        if source not in ['userdir_file', 'userdir_folder', 'importdir_file', 'importdir_folder', 'admin_path']:
            raise exceptions.RequestParameterMissingException('You have to specify "source" parameter. Possible values are "userdir_file", "userdir_folder", "admin_path", "importdir_file" and "importdir_folder". ')
        elif source in ['importdir_file', 'importdir_folder']:
            if not trans.user_is_admin():
                raise exceptions.AdminRequiredException('Only admins can import from importdir.')
            if not trans.app.config.library_import_dir:
                raise exceptions.ConfigDoesNotAllowException('The configuration of this Galaxy instance does not allow admins to import into library from importdir.')
            import_base_dir = trans.app.config.library_import_dir
            if not safe_relpath(path):
                # admins shouldn't be able to explicitly specify a path outside server_dir, but symlinks are allowed.
                # the reasoning here is that galaxy admins may not have direct filesystem access or can only access
                # library_import_dir via FTP (which cannot create symlinks), and may rely on sysadmins to set up the
                # import directory. if they have filesystem access, all bets are off.
                raise exceptions.RequestParameterInvalidException('The given path is invalid.')
            path = os.path.join(import_base_dir, path)
        elif source in ['userdir_file', 'userdir_folder']:
            unsafe = None
            user_login = trans.user.email
            user_base_dir = trans.app.config.user_library_import_dir
            if user_base_dir is None:
                raise exceptions.ConfigDoesNotAllowException('The configuration of this Galaxy instance does not allow upload from user directories.')
            full_dir = os.path.join(user_base_dir, user_login)
            if not safe_contains(full_dir, path, whitelist=trans.app.config.user_library_import_symlink_whitelist):
                # the path is a symlink outside the user dir
                path = os.path.join(full_dir, path)
                log.error('User attempted to import a path that resolves to a path outside of their import dir: %s -> %s', path, os.path.realpath(path))
                raise exceptions.RequestParameterInvalidException('The given path is invalid.')
            path = os.path.join(full_dir, path)
            for unsafe in unsafe_walk(path, whitelist=[full_dir] + trans.app.config.user_library_import_symlink_whitelist):
                # the path is a dir and contains files that symlink outside the user dir
                log.error('User attempted to import a directory containing a path that resolves to a path outside of their import dir: %s -> %s', unsafe, os.path.realpath(unsafe))
            if unsafe:
                raise exceptions.RequestParameterInvalidException('The given path is invalid.')
            if not os.path.exists(path):
                raise exceptions.RequestParameterInvalidException('Given path does not exist on the host.')
            if not self.folder_manager.can_add_item(trans, folder):
                raise exceptions.InsufficientPermissionsException('You do not have proper permission to add items to the given folder.')
        elif source == 'admin_path':
            if not trans.app.config.allow_library_path_paste:
                raise exceptions.ConfigDoesNotAllowException('The configuration of this Galaxy instance does not allow admins to import into library from path.')
            if not trans.user_is_admin():
                raise exceptions.AdminRequiredException('Only admins can import from path.')

        # Set up the traditional tool state/params
        tool_id = 'upload1'
        tool = trans.app.toolbox.get_tool(tool_id)
        state = tool.new_state(trans)
        populate_state(trans, tool.inputs, kwd, state.inputs)
        tool_params = state.inputs
        dataset_upload_inputs = []
        for input in tool.inputs.itervalues():
            if input.type == "upload_dataset":
                dataset_upload_inputs.append(input)
        library_bunch = upload_common.handle_library_params(trans, {}, trans.security.encode_id(folder.id))
        abspath_datasets = []
        kwd['filesystem_paths'] = path
        if source in ['importdir_folder']:
            kwd['filesystem_paths'] = os.path.join(import_base_dir, path)
        # user wants to import one file only
        elif source in ["userdir_file", "importdir_file"]:
            file = os.path.abspath(path)
            abspath_datasets.append(trans.webapp.controllers['library_common'].make_library_uploaded_dataset(
                trans, 'api', kwd, os.path.basename(file), file, 'server_dir', library_bunch))
        # user wants to import whole folder
        elif source == "userdir_folder":
            uploaded_datasets_bunch = trans.webapp.controllers['library_common'].get_path_paste_uploaded_datasets(
                trans, 'api', kwd, library_bunch, 200, '')
            uploaded_datasets = uploaded_datasets_bunch[0]
            if uploaded_datasets is None:
                raise exceptions.ObjectNotFound('Given folder does not contain any datasets.')
            for ud in uploaded_datasets:
                ud.path = os.path.abspath(ud.path)
                abspath_datasets.append(ud)
        #  user wants to import from path
        if source in ["admin_path", "importdir_folder"]:
            # validate the path is within root
            uploaded_datasets_bunch = trans.webapp.controllers['library_common'].get_path_paste_uploaded_datasets(
                trans, 'api', kwd, library_bunch, 200, '')
            uploaded_datasets = uploaded_datasets_bunch[0]
            if uploaded_datasets is None:
                raise exceptions.ObjectNotFound('Given folder does not contain any datasets.')
            for ud in uploaded_datasets:
                ud.path = os.path.abspath(ud.path)
                abspath_datasets.append(ud)
        json_file_path = upload_common.create_paramfile(trans, abspath_datasets)
        data_list = [ud.data for ud in abspath_datasets]
        job_params = {}
        job_params['link_data_only'] = dumps(kwd.get('link_data_only', 'copy_files'))
        job_params['uuid'] = dumps(kwd.get('uuid', None))
        job, output = upload_common.create_job(trans, tool_params, tool, json_file_path, data_list, folder=folder, job_params=job_params)
        trans.sa_session.add(job)
        trans.sa_session.flush()
        job_dict = job.to_dict()
        job_dict['id'] = trans.security.encode_id(job_dict['id'])
        return job_dict
Exemple #15
0
    def load(self, trans, payload=None, **kwd):
        """
        POST /api/libraries/datasets

        Load dataset(s) from the given source into the library.

        :param   payload: dictionary structure containing:
            :param  encoded_folder_id:      the encoded id of the folder to import dataset(s) to
            :type   encoded_folder_id:      an encoded id string
            :param  source:

                source the datasets should be loaded from. Source can be:

                    - user directory

                        root folder specified in galaxy.ini as "$user_library_import_dir"
                        example path: path/to/galaxy/$user_library_import_dir/[email protected]/{user can browse everything here}
                        the folder with the user login has to be created beforehand

                    - (admin)import directory

                        root folder specified in galaxy ini as "$library_import_dir"
                        example path: path/to/galaxy/$library_import_dir/{admin can browse everything here}

                    - (admin)any absolute or relative path

                        option allowed with "allow_library_path_paste" in galaxy.ini

            :type   source:                 str
            :param  link_data:

                flag whether to link the dataset to data or copy it to Galaxy, defaults to copy
                while linking is set to True all symlinks will be resolved _once_

            :type   link_data:              bool
            :param  preserve_dirs:

                flag whether to preserve the directory structure when importing dir
                if False only datasets will be imported

            :type   preserve_dirs:          bool
            :param  file_type:              file type of the loaded datasets, defaults to 'auto' (autodetect)
            :type   file_type:              str
            :param  dbkey:                  dbkey of the loaded genome, defaults to '?' (unknown)
            :type   dbkey:                  str
            :param  tag_using_filenames:    flag whether to generate dataset tags from filenames
            :type   tag_using_filenames:    bool

        :type   dictionary

        :returns:   dict containing information about the created upload job
        :rtype:     dictionary

        :raises: RequestParameterMissingException, AdminRequiredException, ConfigDoesNotAllowException, RequestParameterInvalidException
                    InsufficientPermissionsException, ObjectNotFound
        """
        if payload:
            kwd.update(payload)
        kwd['space_to_tab'] = False
        kwd['to_posix_lines'] = True
        kwd['dbkey'] = kwd.get('dbkey', '?')
        kwd['file_type'] = kwd.get('file_type', 'auto')
        kwd['link_data_only'] = 'link_to_files' if util.string_as_bool(kwd.get('link_data', False)) else 'copy_files'
        kwd['tag_using_filenames'] = util.string_as_bool(kwd.get('tag_using_filenames', None))
        encoded_folder_id = kwd.get('encoded_folder_id', None)
        if encoded_folder_id is not None:
            folder_id = self.folder_manager.cut_and_decode(trans, encoded_folder_id)
        else:
            raise exceptions.RequestParameterMissingException('The required attribute encoded_folder_id is missing.')
        path = kwd.get('path', None)
        if path is None:
            raise exceptions.RequestParameterMissingException('The required attribute path is missing.')
        if not isinstance(path, str):
            raise exceptions.RequestParameterInvalidException('The required attribute path is not String.')

        folder = self.folder_manager.get(trans, folder_id)

        source = kwd.get('source', None)
        if source not in ['userdir_file', 'userdir_folder', 'importdir_file', 'importdir_folder', 'admin_path']:
            raise exceptions.RequestParameterMissingException('You have to specify "source" parameter. Possible values are "userdir_file", "userdir_folder", "admin_path", "importdir_file" and "importdir_folder". ')
        elif source in ['importdir_file', 'importdir_folder']:
            if not trans.user_is_admin:
                raise exceptions.AdminRequiredException('Only admins can import from importdir.')
            if not trans.app.config.library_import_dir:
                raise exceptions.ConfigDoesNotAllowException('The configuration of this Galaxy instance does not allow admins to import into library from importdir.')
            import_base_dir = trans.app.config.library_import_dir
            if not safe_relpath(path):
                # admins shouldn't be able to explicitly specify a path outside server_dir, but symlinks are allowed.
                # the reasoning here is that galaxy admins may not have direct filesystem access or can only access
                # library_import_dir via FTP (which cannot create symlinks), and may rely on sysadmins to set up the
                # import directory. if they have filesystem access, all bets are off.
                raise exceptions.RequestParameterInvalidException('The given path is invalid.')
            path = os.path.join(import_base_dir, path)
        elif source in ['userdir_file', 'userdir_folder']:
            username = trans.user.username if trans.app.config.user_library_import_check_permissions else None
            user_login = trans.user.email
            user_base_dir = trans.app.config.user_library_import_dir
            if user_base_dir is None:
                raise exceptions.ConfigDoesNotAllowException('The configuration of this Galaxy instance does not allow upload from user directories.')
            full_dir = os.path.join(user_base_dir, user_login)

            if not safe_contains(full_dir, path, allowlist=trans.app.config.user_library_import_symlink_allowlist):
                # the path is a symlink outside the user dir
                path = os.path.join(full_dir, path)
                log.error('User attempted to import a path that resolves to a path outside of their import dir: %s -> %s', path, os.path.realpath(path))
                raise exceptions.RequestParameterInvalidException('The given path is invalid.')
            if trans.app.config.user_library_import_check_permissions and not full_path_permission_for_user(full_dir, path, username):
                log.error('User attempted to import a path that resolves to a path outside of their import dir: '
                        '%s -> %s and cannot be read by them.', path, os.path.realpath(path))
                raise exceptions.RequestParameterInvalidException('The given path is invalid.')
            path = os.path.join(full_dir, path)
            if unsafe_walk(path, allowlist=[full_dir] + trans.app.config.user_library_import_symlink_allowlist, username=username):
                # the path is a dir and contains files that symlink outside the user dir
                error = 'User attempted to import a path that resolves to a path outside of their import dir: {} -> {}'.format(
                    path, os.path.realpath(path)
                )
                if trans.app.config.user_library_import_check_permissions:
                    error += ' or is not readable for them.'
                log.error(error)
                raise exceptions.RequestParameterInvalidException('The given path is invalid.')
            if not os.path.exists(path):
                raise exceptions.RequestParameterInvalidException('Given path does not exist on the host.')
            if not self.folder_manager.can_add_item(trans, folder):
                raise exceptions.InsufficientPermissionsException('You do not have proper permission to add items to the given folder.')
        elif source == 'admin_path':
            if not trans.app.config.allow_library_path_paste:
                raise exceptions.ConfigDoesNotAllowException('The configuration of this Galaxy instance does not allow admins to import into library from path.')
            if not trans.user_is_admin:
                raise exceptions.AdminRequiredException('Only admins can import from path.')

        # Set up the traditional tool state/params
        tool_id = 'upload1'
        tool = trans.app.toolbox.get_tool(tool_id)
        state = tool.new_state(trans)
        populate_state(trans, tool.inputs, kwd, state.inputs)
        tool_params = state.inputs
        dataset_upload_inputs = []
        for input in tool.inputs.values():
            if input.type == "upload_dataset":
                dataset_upload_inputs.append(input)
        library_bunch = upload_common.handle_library_params(trans, {}, trans.security.encode_id(folder.id))
        abspath_datasets = []
        kwd['filesystem_paths'] = path
        if source in ['importdir_folder']:
            kwd['filesystem_paths'] = os.path.join(import_base_dir, path)
        # user wants to import one file only
        elif source in ["userdir_file", "importdir_file"]:
            file = os.path.abspath(path)
            abspath_datasets.append(self._make_library_uploaded_dataset(
                trans, kwd, os.path.basename(file), file, 'server_dir', library_bunch))
        # user wants to import whole folder
        elif source == "userdir_folder":
            uploaded_datasets_bunch = self._get_path_paste_uploaded_datasets(
                trans, kwd, library_bunch, 200, '')
            uploaded_datasets = uploaded_datasets_bunch[0]
            if uploaded_datasets is None:
                raise exceptions.ObjectNotFound('Given folder does not contain any datasets.')
            for ud in uploaded_datasets:
                ud.path = os.path.abspath(ud.path)
                abspath_datasets.append(ud)
        #  user wants to import from path
        if source in ["admin_path", "importdir_folder"]:
            # validate the path is within root
            uploaded_datasets_bunch = self._get_path_paste_uploaded_datasets(
                trans, kwd, library_bunch, 200, '')
            uploaded_datasets = uploaded_datasets_bunch[0]
            if uploaded_datasets is None:
                raise exceptions.ObjectNotFound('Given folder does not contain any datasets.')
            for ud in uploaded_datasets:
                ud.path = os.path.abspath(ud.path)
                abspath_datasets.append(ud)
        json_file_path = upload_common.create_paramfile(trans, abspath_datasets)
        data_list = [ud.data for ud in abspath_datasets]
        job_params = {}
        job_params['link_data_only'] = dumps(kwd.get('link_data_only', 'copy_files'))
        job_params['uuid'] = dumps(kwd.get('uuid', None))
        job, output = upload_common.create_job(trans, tool_params, tool, json_file_path, data_list, folder=folder, job_params=job_params)
        trans.app.job_manager.enqueue(job, tool=tool)
        job_dict = job.to_dict()
        job_dict['id'] = trans.security.encode_id(job_dict['id'])
        return job_dict
Exemple #16
0
    def _construct_path(self, obj, old_style=False, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs):
        """
        Construct the absolute path for accessing the object identified by `obj.id`.

        :type base_dir: string
        :param base_dir: A key in self.extra_dirs corresponding to the base
                         directory in which this object should be created, or
                         None to specify the default directory.

        :type dir_only: boolean
        :param dir_only: If True, check only the path where the file
                         identified by `obj` should be located, not the
                         dataset itself. This option applies to `extra_dir`
                         argument as well.

        :type extra_dir: string
        :param extra_dir: Append the value of this parameter to the expected
            path used to access the object identified by `obj` (e.g.,
            /files/000/<extra_dir>/dataset_10.dat).

        :type alt_name: string
        :param alt_name: Use this name as the alternative name for the returned
                         dataset rather than the default.

        :type old_style: boolean
        param old_style: This option is used for backward compatibility. If
            `True` then the composed directory structure does not include a
            hash id (e.g., /files/dataset_10.dat (old) vs.
            /files/000/dataset_10.dat (new))
        """
        base = os.path.abspath(self.extra_dirs.get(base_dir, self.file_path))
        # extra_dir should never be constructed from provided data but just
        # make sure there are no shenannigans afoot
        if extra_dir and extra_dir != os.path.normpath(extra_dir):
            log.warning('extra_dir is not normalized: %s', extra_dir)
            raise ObjectInvalid("The requested object is invalid")
        # ensure that any parent directory references in alt_name would not
        # result in a path not contained in the directory path constructed here
        if alt_name and not safe_relpath(alt_name):
            log.warning('alt_name would locate path outside dir: %s', alt_name)
            raise ObjectInvalid("The requested object is invalid")
        if old_style:
            if extra_dir is not None:
                path = os.path.join(base, extra_dir)
            else:
                path = base
        else:
            # Construct hashed path
            rel_path = os.path.join(*directory_hash_id(obj.id))
            # Create a subdirectory for the object ID
            if obj_dir:
                rel_path = os.path.join(rel_path, str(obj.id))
            # Optionally append extra_dir
            if extra_dir is not None:
                if extra_dir_at_root:
                    rel_path = os.path.join(extra_dir, rel_path)
                else:
                    rel_path = os.path.join(rel_path, extra_dir)
            path = os.path.join(base, rel_path)
        if not dir_only:
            path = os.path.join(path, alt_name if alt_name else "dataset_%s.dat" % obj.id)
        return os.path.abspath(path)