Beispiel #1
0
    def __get_rods_path(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, strip_dat=True, **kwargs):
        # extra_dir should never be constructed from provided data but just
        # make sure there are no shenannigans afoot
        if extra_dir and extra_dir != os.path.normpath(extra_dir):
            log.warning('extra_dir is not normalized: %s', extra_dir)
            raise ObjectInvalid("The requested object is invalid")
        # ensure that any parent directory references in alt_name would not
        # result in a path not contained in the directory path constructed here
        if alt_name:
            if not safe_relpath(alt_name):
                log.warning('alt_name would locate path outside dir: %s', alt_name)
                raise ObjectInvalid("The requested object is invalid")
            # alt_name can contain parent directory references, but iRODS will
            # not follow them, so if they are valid we normalize them out
            alt_name = os.path.normpath(alt_name)
        path = ""
        if extra_dir is not None:
            path = extra_dir

        # extra_dir_at_root is ignored - since the iRODS plugin does not use
        # the directory hash, there is only one level of subdirectory.

        if not dir_only:
            # the .dat extension is stripped when stored in iRODS
            # TODO: is the strip_dat kwarg the best way to implement this?
            if strip_dat and alt_name and alt_name.endswith('.dat'):
                alt_name = os.path.splitext(alt_name)[0]
            default_name = 'dataset_%s' % obj.id
            if not strip_dat:
                default_name += '.dat'
            path = path_join(path, alt_name if alt_name else default_name)

        path = path_join(self.root_collection_path, path)
        return path
Beispiel #2
0
    def __init__( self, config, file_path=None, extra_dirs=None ):
        super( IRODSObjectStore, self ).__init__( config, file_path=file_path, extra_dirs=extra_dirs )
        assert irods is not None, IRODS_IMPORT_MESSAGE
        self.cache_path = config.object_store_cache_path
        self.default_resource = config.irods_default_resource or None

        # Connect to iRODS (AssertionErrors will be raised if anything goes wrong)
        self.rods_env, self.rods_conn = rods_connect()

        # if the root collection path in the config is unset or relative, try to use a sensible default
        if config.irods_root_collection_path is None or ( config.irods_root_collection_path is not None and not config.irods_root_collection_path.startswith( '/' ) ):
            rods_home = self.rods_env.rodsHome
            assert rods_home != '', "Unable to initialize iRODS Object Store: rodsHome cannot be determined and irods_root_collection_path in Galaxy config is unset or not absolute."
            if config.irods_root_collection_path is None:
                self.root_collection_path = path_join( rods_home, 'galaxy_data' )
            else:
                self.root_collection_path = path_join( rods_home, config.irods_root_collection_path )
        else:
            self.root_collection_path = config.irods_root_collection_path

        # will return a collection object regardless of whether it exists
        self.root_collection = irods.irodsCollection( self.rods_conn, self.root_collection_path )

        if self.root_collection.getId() == -1:
            log.warning( "iRODS root collection does not exist, will attempt to create: %s", self.root_collection_path )
            self.root_collection.upCollection()
            assert self.root_collection.createCollection( os.path.basename( self.root_collection_path ) ) == 0, "iRODS root collection creation failed: %s" % self.root_collection_path
            self.root_collection = irods.irodsCollection( self.rods_conn, self.root_collection_path )
            assert self.root_collection.getId() != -1, "iRODS root collection creation claimed success but still does not exist"

        if self.default_resource is None:
            self.default_resource = self.rods_env.rodsDefResource

        log.info( "iRODS data for this instance will be stored in collection: %s, resource: %s", self.root_collection_path, self.default_resource )
Beispiel #3
0
    def __init__( self, config, file_path=None, extra_dirs=None ):
        super( IRODSObjectStore, self ).__init__( config, file_path=file_path, extra_dirs=extra_dirs )
        assert irods is not None, IRODS_IMPORT_MESSAGE
        self.cache_path = config.object_store_cache_path
        self.default_resource = config.irods_default_resource or None

        # Connect to iRODS (AssertionErrors will be raised if anything goes wrong)
        self.rods_env, self.rods_conn = rods_connect()

        # if the root collection path in the config is unset or relative, try to use a sensible default
        if config.irods_root_collection_path is None or ( config.irods_root_collection_path is not None and not config.irods_root_collection_path.startswith( '/' ) ):
            rods_home = self.rods_env.rodsHome
            assert rods_home != '', "Unable to initialize iRODS Object Store: rodsHome cannot be determined and irods_root_collection_path in Galaxy config is unset or not absolute."
            if config.irods_root_collection_path is None:
                self.root_collection_path = path_join( rods_home, 'galaxy_data' )
            else:
                self.root_collection_path = path_join( rods_home, config.irods_root_collection_path )
        else:
            self.root_collection_path = config.irods_root_collection_path

        # will return a collection object regardless of whether it exists
        self.root_collection = irods.irodsCollection( self.rods_conn, self.root_collection_path )

        if self.root_collection.getId() == -1:
            log.warning( "iRODS root collection does not exist, will attempt to create: %s", self.root_collection_path )
            self.root_collection.upCollection()
            assert self.root_collection.createCollection( os.path.basename( self.root_collection_path ) ) == 0, "iRODS root collection creation failed: %s" % self.root_collection_path
            self.root_collection = irods.irodsCollection( self.rods_conn, self.root_collection_path )
            assert self.root_collection.getId() != -1, "iRODS root collection creation claimed success but still does not exist"

        if self.default_resource is None:
            self.default_resource = self.rods_env.rodsDefResource

        log.info( "iRODS data for this instance will be stored in collection: %s, resource: %s", self.root_collection_path, self.default_resource )
Beispiel #4
0
    def __get_rods_path(
        self,
        obj,
        base_dir=None,
        dir_only=False,
        extra_dir=None,
        extra_dir_at_root=False,
        alt_name=None,
        strip_dat=True,
        **kwargs
    ):
        path = ""
        if extra_dir is not None:
            path = extra_dir

        # extra_dir_at_root is ignored - since the iRODS plugin does not use
        # the directory hash, there is only one level of subdirectory.

        if not dir_only:
            # the .dat extension is stripped when stored in iRODS
            # TODO: is the strip_dat kwarg the best way to implement this?
            if strip_dat and alt_name and alt_name.endswith(".dat"):
                alt_name = os.path.splitext(alt_name)[0]
            default_name = "dataset_%s" % obj.id
            if not strip_dat:
                default_name += ".dat"
            path = path_join(path, alt_name if alt_name else default_name)

        path = path_join(self.root_collection_path, path)
        return path
Beispiel #5
0
    def __get_rods_path(self,
                        obj,
                        base_dir=None,
                        dir_only=False,
                        extra_dir=None,
                        extra_dir_at_root=False,
                        alt_name=None,
                        strip_dat=True,
                        **kwargs):
        path = ""
        if extra_dir is not None:
            path = extra_dir

        # extra_dir_at_root is ignored - since the iRODS plugin does not use
        # the directory hash, there is only one level of subdirectory.

        if not dir_only:
            # the .dat extension is stripped when stored in iRODS
            # TODO: is the strip_dat kwarg the best way to implement this?
            if strip_dat and alt_name and alt_name.endswith('.dat'):
                alt_name = os.path.splitext(alt_name)[0]
            default_name = 'dataset_%s' % obj.id
            if not strip_dat:
                default_name += '.dat'
            path = path_join(path, alt_name if alt_name else default_name)

        path = path_join(self.root_collection_path, path)

        #log.debug( 'iRODS path for %s %s is %s', obj.__class__.__name__, obj.id, path )

        return path
Beispiel #6
0
    def __get_rods_path( self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, strip_dat=True, **kwargs ):
        # extra_dir should never be constructed from provided data but just
        # make sure there are no shenannigans afoot
        if extra_dir and extra_dir != os.path.normpath(extra_dir):
            log.warning('extra_dir is not normalized: %s', extra_dir)
            raise ObjectInvalid("The requested object is invalid")
        # ensure that any parent directory references in alt_name would not
        # result in a path not contained in the directory path constructed here
        if alt_name:
            if not safe_relpath(alt_name):
                log.warning('alt_name would locate path outside dir: %s', alt_name)
                raise ObjectInvalid("The requested object is invalid")
            # alt_name can contain parent directory references, but iRODS will
            # not follow them, so if they are valid we normalize them out
            alt_name = os.path.normpath(alt_name)
        path = ""
        if extra_dir is not None:
            path = extra_dir

        # extra_dir_at_root is ignored - since the iRODS plugin does not use
        # the directory hash, there is only one level of subdirectory.

        if not dir_only:
            # the .dat extension is stripped when stored in iRODS
            # TODO: is the strip_dat kwarg the best way to implement this?
            if strip_dat and alt_name and alt_name.endswith( '.dat' ):
                alt_name = os.path.splitext( alt_name )[0]
            default_name = 'dataset_%s' % obj.id
            if not strip_dat:
                default_name += '.dat'
            path = path_join( path, alt_name if alt_name else default_name )

        path = path_join( self.root_collection_path, path )
        return path
Beispiel #7
0
 def default(self, project, proto, *args):
     if proto not in ('http:', 'https:') and args:
         raise cherrypy.HTTPError(requests.codes.BAD_REQUEST)
     file = args[-1]
     file_path = os.path.join(self._storage, project, file)
     if not os.path.exists(file_path):
         url = f'{proto}//{urlunquote("/".join(args))}'
         try:
             r = requests.get(url, stream=True,
                              headers=self._headers,
                              proxies=self._proxies,
                              timeout=self._timeout)
         except Exception as ex:
             msg = str(ex)
             cherrypy.log(msg)
             raise cherrypy.HTTPError(message=msg)
         if r.status_code == requests.codes.OK:
             os.makedirs(os.path.dirname(file_path), exist_ok=True)
             resp_headers = cherrypy.response.headers
             resp_headers['Content-Type'] = r.headers['Content-Type']
             resp_headers['Content-Length'] = r.headers['Content-Length']
             return _content(r, file_path)
         else:
             raise cherrypy.HTTPError(r.status_code)
     else:
         raise cherrypy.HTTPRedirect(path_join(root_storage, project, file))
Beispiel #8
0
    def _remote(self, project, project_path, response):
        def replace(m):
            return self._replace(m, project)

        if os.path.exists(project_path):

            def replace_local(m):
                return self._replace(m, project, True)

            local_files = self._local_files(project_path)
            lines = []
            for line in response.text.splitlines():
                if '<a ' in line:
                    file = search_re.search(line)[1]
                    if file in local_files:
                        item = (file, replace_re.sub(replace_local, line))
                        del local_files[file]
                    else:
                        item = (file, replace_re.sub(replace, line))
                    lines.append(item)
            for file in local_files:
                url = path_join(root_storage, project, file)
                hash_data = local_files[file]
                if hash_data:
                    url += '#' + hash_data
                lines.append((file, f'<a href="{url}">{file}</a><br>'))
            lines.sort(key=lambda x: parse_version(x[0]))
            content = '\n'.join(line[1] for line in lines)
            return _HTML % dict(project=project, content=content)
        return replace_re.sub(replace, response.text)
Beispiel #9
0
 def _replace(self, m, project, local=False):
     if local:
         url = path_join(root_storage, project, m['url'].rsplit('/', 1)[-1])
         if m['hash']:
             return f'<a href="{url}#{m["hash"]}"'
         else:
             return f'<a href="{url}"'
     else:
         url = urljoin(self._index_url, m['url'])
         if m['hash']:
             return (f'<a href="{packs_path}/{project}/'
                     f'{urlquote(url)}#{m["hash"]}"')
         else:
             return f'<a href="{packs_path}/{project}/{urlquote(url)}"'
Beispiel #10
0
 def _local(self, project, project_path):
     if os.path.exists(project_path):
         lines = []
         for file in sorted(os.listdir(project_path), key=parse_version):
             if file.endswith(hash_ext):
                 continue
             url = path_join(root_storage, project, file)
             hash_file = os.path.join(project_path, file + hash_ext)
             if os.path.exists(hash_file):
                 url += '#' + read_all(hash_file).strip()
             lines.append(f'<a href="{url}">{file}</a><br>')
         if lines:
             return _HTML % dict(project=project, content='\n'.join(lines))
     raise cherrypy.HTTPError(requests.codes.NOT_FOUND)
Beispiel #11
0
 def unzip(f):
     z = mkZipFileRd(f)
     names = z.namelist()
     if len(names) != 1:
         raise IOError('more than one item in zip file; which to use? %s' % names)  # noqa
     member = names[0]
     log.info('extracting %s from %s', member, f)
     # x.zip    -> x    -> x
     # x.db.zip -> x.db -> x
     destdir = splitext(splitext(f)[0])[0]
     dest = destdir + '.db'
     z.extract(member, destdir)
     rename(path_join(destdir, member), dest)
     rmdir(destdir)
     return dest
Beispiel #12
0
 def unzip(f):
     z = mkZipFileRd(f)
     names = z.namelist()
     if len(names) != 1:
         raise IOError('more than one item in zip file; which to use? %s' %
                       names)  # noqa
     member = names[0]
     log.info('extracting %s from %s', member, f)
     # x.zip    -> x    -> x
     # x.db.zip -> x.db -> x
     destdir = splitext(splitext(f)[0])[0]
     dest = destdir + '.db'
     z.extract(member, destdir)
     rename(path_join(destdir, member), dest)
     rmdir(destdir)
     return dest
Beispiel #13
0
 def _files(self, project):
     proj_path = os.path.join(self._storage, project)
     files = None
     if os.path.exists(proj_path):
         files = []
         with os.scandir(proj_path) as it:
             for entry in sorted(it, key=lambda e: parse_version(e.name)):
                 if entry.name.endswith(hash_ext):
                     continue
                 stat = entry.stat()
                 files.append(
                     ProjectFile(
                         entry.name,
                         path_join(path, storage, project, entry.name),
                         format_bin_prefix('.1f', stat.st_size),
                         datetime.fromtimestamp(stat.st_mtime).isoformat(
                             ' ', 'seconds')))
     return files
Beispiel #14
0
config = {
    '/': {},
    storage: {
        'tools.staticdir.on': True,
        'tools.staticdir.dir': '.',
    },
    '/favicon.ico': {
        'tools.staticfile.on': True,
        'tools.staticfile.filename': get_favicon_path(),
    }
}
proj_nam_re = re.compile('^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$',
                         re.IGNORECASE)
hash_ext = '.sha256'
chunk_size = 8192
project_path = path_join(path, '/project/')


class Root:
    def __init__(self, cfg):
        if cfg['admin-pass']:
            self._password = cfg['admin-pass']
            config['/admin'] = {
                'tools.sessions.on': True,
                'tools.sessions.name': 'admin_session_id',
                'tools.sessions.timeout': cfg['admin-expire']
            }
        self._admin_enabled = bool(cfg['admin-pass'])
        self._storage = cfg['storage-path']
        self._project_url = cfg['project-url']
Beispiel #15
0
 def descendant(sub):
     d = path_join(path, sub)
     if not d.startswith(path):
         raise IOError('%s not under %s', d, path)
     return d
Beispiel #16
0
import os
from posixpath import join as path_join
from urllib.parse import unquote as urlunquote

import cherrypy
import requests

from . import root, PACKS_PATH

path = PACKS_PATH
config = {'/': {}}
root_storage = path_join(root.path, root.storage)
chunk_size = 8192


class Packs:
    def __init__(self, cfg):
        self._storage = cfg['storage-path']
        self._proxies = cfg['proxies']
        self._timeout = cfg['timeout']
        self._headers = {'User-Agent': cfg['user-agent']}

    @cherrypy.expose
    def default(self, project, proto, *args):
        if proto not in ('http:', 'https:') and args:
            raise cherrypy.HTTPError(requests.codes.BAD_REQUEST)
        file = args[-1]
        file_path = os.path.join(self._storage, project, file)
        if not os.path.exists(file_path):
            url = f'{proto}//{urlunquote("/".join(args))}'
            try:
 def descendant(sub):
     d = path_join(path, sub)
     if not d.startswith(path):
         raise IOError('%s not under %s', d, path)
     return d