Example #1
0
def validate_server_directory_upload(trans, server_dir):
    if server_dir in [None, 'None', '']:
        raise RequestParameterInvalidException("Invalid or unspecified server_dir parameter")

    if trans.user_is_admin:
        import_dir = trans.app.config.library_import_dir
        import_dir_desc = 'library_import_dir'
        if not import_dir:
            raise ConfigDoesNotAllowException('"library_import_dir" is not set in the Galaxy configuration')
    else:
        import_dir = trans.app.config.user_library_import_dir
        if not import_dir:
            raise ConfigDoesNotAllowException('"user_library_import_dir" is not set in the Galaxy configuration')
        if server_dir != trans.user.email:
            import_dir = os.path.join(import_dir, trans.user.email)
        import_dir_desc = 'user_library_import_dir'

    full_dir = os.path.join(import_dir, server_dir)
    unsafe = None
    if safe_relpath(server_dir):
        username = trans.user.username if trans.app.config.user_library_import_check_permissions else None
        if import_dir_desc == 'user_library_import_dir' and safe_contains(import_dir, full_dir, whitelist=trans.app.config.user_library_import_symlink_whitelist):
            for unsafe in unsafe_walk(full_dir, whitelist=[import_dir] + trans.app.config.user_library_import_symlink_whitelist, username=username):
                log.error('User attempted to import a path that resolves to a path outside of their import dir: %s -> %s', unsafe, os.path.realpath(unsafe))
    else:
        log.error('User attempted to import a directory path that resolves to a path outside of their import dir: %s -> %s', server_dir, os.path.realpath(full_dir))
        unsafe = True
    if unsafe:
        raise RequestParameterInvalidException("Invalid server_dir specified")

    return full_dir, import_dir_desc
Example #2
0
def validate_path_upload(trans):
    if not trans.app.config.allow_library_path_paste:
        raise ConfigDoesNotAllowException(
            '"allow_path_paste" is not set to True in the Galaxy configuration file'
        )

    if not trans.user_is_admin:
        raise AdminRequiredException(
            'Uploading files via filesystem paths can only be performed by administrators'
        )
Example #3
0
    def _search(self, trans, q, page=1, page_size=10):
        """
        Perform the search over TS repositories.
        Note that search works over the Whoosh index which you have
        to pre-create with scripts/tool_shed/build_ts_whoosh_index.sh manually.
        Also TS config option toolshed_search_on has to be True and
        whoosh_index_dir has to be specified.
        """
        conf = self.app.config
        if not conf.toolshed_search_on:
            raise ConfigDoesNotAllowException(
                'Searching the TS through the API is turned off for this instance.'
            )
        if not conf.whoosh_index_dir:
            raise ConfigDoesNotAllowException(
                'There is no directory for the search index specified. Please contact the administrator.'
            )
        search_term = q.strip()
        if len(search_term) < 3:
            raise RequestParameterInvalidException(
                'The search term has to be at least 3 characters long.')

        repo_search = RepoSearch()

        Boosts = namedtuple('Boosts', [
            'repo_name_boost', 'repo_description_boost',
            'repo_long_description_boost', 'repo_homepage_url_boost',
            'repo_remote_repository_url_boost', 'repo_owner_username_boost'
        ])
        boosts = Boosts(
            float(conf.get('repo_name_boost', 0.9)),
            float(conf.get('repo_description_boost', 0.6)),
            float(conf.get('repo_long_description_boost', 0.5)),
            float(conf.get('repo_homepage_url_boost', 0.3)),
            float(conf.get('repo_remote_repository_url_boost', 0.2)),
            float(conf.get('repo_owner_username_boost', 0.3)))

        results = repo_search.search(trans, search_term, page, page_size,
                                     boosts)
        results['hostname'] = web.url_for('/', qualified=True)
        return results
Example #4
0
        def get_url_paste_urls_or_filename(group_incoming,
                                           override_name=None,
                                           override_info=None):
            url_paste_file = group_incoming.get('url_paste', None)
            if url_paste_file is not None:
                url_paste = open(url_paste_file, 'r').read()

                def start_of_url(content):
                    start_of_url_paste = content.lstrip()[0:8].lower()
                    looks_like_url = False
                    for url_prefix in [
                            "http://", "https://", "ftp://", "file://"
                    ]:
                        if start_of_url_paste.startswith(url_prefix):
                            looks_like_url = True
                            break

                    return looks_like_url

                if start_of_url(url_paste):
                    url_paste = url_paste.replace('\r', '').split('\n')
                    for line in url_paste:
                        line = line.strip()
                        if line:
                            if not start_of_url(line):
                                continue  # non-url line, ignore

                            if "file://" in line:
                                if not trans.user_is_admin:
                                    raise AdminRequiredException()
                                elif not trans.app.config.allow_path_paste:
                                    raise ConfigDoesNotAllowException()
                                upload_path = line[len("file://"):]
                                dataset_name = os.path.basename(upload_path)
                            else:
                                dataset_name = line

                            if override_name:
                                dataset_name = override_name
                            yield Bunch(type='url',
                                        path=line,
                                        name=dataset_name)
                else:
                    dataset_name = 'Pasted Entry'  # we need to differentiate between various url pastes here
                    if override_name:
                        dataset_name = override_name
                    yield Bunch(type='file',
                                path=url_paste_file,
                                name=dataset_name)
Example #5
0
def validate_url(url, ip_allowlist):
    # If it doesn't look like a URL, ignore it.
    if not (url.lstrip().startswith('http://')
            or url.lstrip().startswith('https://')):
        return url

    # Strip leading whitespace before passing url to urlparse()
    url = url.lstrip()
    # Extract hostname component
    parsed_url = urlparse(url).netloc
    # If credentials are in this URL, we need to strip those.
    if parsed_url.count('@') > 0:
        # credentials.
        parsed_url = parsed_url[parsed_url.rindex('@') + 1:]
    # Percent encoded colons and other characters will not be resolved as such
    # so we don't have to either.

    # Sometimes the netloc will contain the port which is not desired, so we
    # need to extract that.
    port = None
    # However, it could ALSO be an IPv6 address they've supplied.
    if ':' in parsed_url:
        # IPv6 addresses have colons in them already (it seems like always more than two)
        if parsed_url.count(':') >= 2:
            # Since IPv6 already use colons extensively, they wrap it in
            # brackets when there is a port, e.g. http://[2001:db8:1f70::999:de8:7648:6e8]:100/
            # However if it ends with a ']' then there is no port after it and
            # they've wrapped it in brackets just for fun.
            if ']' in parsed_url and not parsed_url.endswith(']'):
                # If this +1 throws a range error, we don't care, their url
                # shouldn't end with a colon.
                idx = parsed_url.rindex(':')
                # We parse as an int and let this fail ungracefully if parsing
                # fails because we desire to fail closed rather than open.
                port = int(parsed_url[idx + 1:])
                parsed_url = parsed_url[:idx]
            else:
                # Plain ipv6 without port
                pass
        else:
            # This should finally be ipv4 with port. It cannot be IPv6 as that
            # was caught by earlier cases, and it cannot be due to credentials.
            idx = parsed_url.rindex(':')
            port = int(parsed_url[idx + 1:])
            parsed_url = parsed_url[:idx]

    # safe to log out, no credentials/request path, just an IP + port
    log.debug("parsed url, port: %s : %s", parsed_url, port)
    # Call getaddrinfo to resolve hostname into tuples containing IPs.
    addrinfo = socket.getaddrinfo(parsed_url, port)
    # Get the IP addresses that this entry resolves to (uniquely)
    # We drop:
    #   AF_* family: It will resolve to AF_INET or AF_INET6, getaddrinfo(3) doesn't even mention AF_UNIX,
    #   socktype: We don't care if a stream/dgram/raw protocol
    #   protocol: we don't care if it is tcp or udp.
    addrinfo_results = {info[4][0] for info in addrinfo}
    # There may be multiple (e.g. IPv4 + IPv6 or DNS round robin). Any one of these
    # could resolve to a local addresses (and could be returned by chance),
    # therefore we must check them all.
    for raw_ip in addrinfo_results:
        # Convert to an IP object so we can tell if it is in private space.
        ip = ipaddress.ip_address(unicodify(raw_ip))
        # If this is a private address
        if ip.is_private:
            results = []
            # If this IP is not anywhere in the allowlist
            for allowlisted in ip_allowlist:
                # If it's an IP address range (rather than a single one...)
                if hasattr(allowlisted, 'subnets'):
                    results.append(ip in allowlisted)
                else:
                    results.append(ip == allowlisted)

            if any(results):
                # If we had any True, then THIS (and ONLY THIS) IP address that
                # that specific DNS entry resolved to is in allowlisted and
                # safe to access. But we cannot exit here, we must ensure that
                # all IPs that that DNS entry resolves to are likewise safe.
                pass
            else:
                # Otherwise, we deny access.
                raise ConfigDoesNotAllowException(
                    "Access to this address in not permitted by server configuration"
                )
    return url