def validate_server_directory_upload(trans, server_dir): if server_dir in [None, 'None', '']: raise RequestParameterInvalidException("Invalid or unspecified server_dir parameter") if trans.user_is_admin: import_dir = trans.app.config.library_import_dir import_dir_desc = 'library_import_dir' if not import_dir: raise ConfigDoesNotAllowException('"library_import_dir" is not set in the Galaxy configuration') else: import_dir = trans.app.config.user_library_import_dir if not import_dir: raise ConfigDoesNotAllowException('"user_library_import_dir" is not set in the Galaxy configuration') if server_dir != trans.user.email: import_dir = os.path.join(import_dir, trans.user.email) import_dir_desc = 'user_library_import_dir' full_dir = os.path.join(import_dir, server_dir) unsafe = None if safe_relpath(server_dir): username = trans.user.username if trans.app.config.user_library_import_check_permissions else None if import_dir_desc == 'user_library_import_dir' and safe_contains(import_dir, full_dir, whitelist=trans.app.config.user_library_import_symlink_whitelist): for unsafe in unsafe_walk(full_dir, whitelist=[import_dir] + trans.app.config.user_library_import_symlink_whitelist, username=username): log.error('User attempted to import a path that resolves to a path outside of their import dir: %s -> %s', unsafe, os.path.realpath(unsafe)) else: log.error('User attempted to import a directory path that resolves to a path outside of their import dir: %s -> %s', server_dir, os.path.realpath(full_dir)) unsafe = True if unsafe: raise RequestParameterInvalidException("Invalid server_dir specified") return full_dir, import_dir_desc
def validate_path_upload(trans): if not trans.app.config.allow_library_path_paste: raise ConfigDoesNotAllowException( '"allow_path_paste" is not set to True in the Galaxy configuration file' ) if not trans.user_is_admin: raise AdminRequiredException( 'Uploading files via filesystem paths can only be performed by administrators' )
def _search(self, trans, q, page=1, page_size=10): """ Perform the search over TS repositories. Note that search works over the Whoosh index which you have to pre-create with scripts/tool_shed/build_ts_whoosh_index.sh manually. Also TS config option toolshed_search_on has to be True and whoosh_index_dir has to be specified. """ conf = self.app.config if not conf.toolshed_search_on: raise ConfigDoesNotAllowException( 'Searching the TS through the API is turned off for this instance.' ) if not conf.whoosh_index_dir: raise ConfigDoesNotAllowException( 'There is no directory for the search index specified. Please contact the administrator.' ) search_term = q.strip() if len(search_term) < 3: raise RequestParameterInvalidException( 'The search term has to be at least 3 characters long.') repo_search = RepoSearch() Boosts = namedtuple('Boosts', [ 'repo_name_boost', 'repo_description_boost', 'repo_long_description_boost', 'repo_homepage_url_boost', 'repo_remote_repository_url_boost', 'repo_owner_username_boost' ]) boosts = Boosts( float(conf.get('repo_name_boost', 0.9)), float(conf.get('repo_description_boost', 0.6)), float(conf.get('repo_long_description_boost', 0.5)), float(conf.get('repo_homepage_url_boost', 0.3)), float(conf.get('repo_remote_repository_url_boost', 0.2)), float(conf.get('repo_owner_username_boost', 0.3))) results = repo_search.search(trans, search_term, page, page_size, boosts) results['hostname'] = web.url_for('/', qualified=True) return results
def get_url_paste_urls_or_filename(group_incoming, override_name=None, override_info=None): url_paste_file = group_incoming.get('url_paste', None) if url_paste_file is not None: url_paste = open(url_paste_file, 'r').read() def start_of_url(content): start_of_url_paste = content.lstrip()[0:8].lower() looks_like_url = False for url_prefix in [ "http://", "https://", "ftp://", "file://" ]: if start_of_url_paste.startswith(url_prefix): looks_like_url = True break return looks_like_url if start_of_url(url_paste): url_paste = url_paste.replace('\r', '').split('\n') for line in url_paste: line = line.strip() if line: if not start_of_url(line): continue # non-url line, ignore if "file://" in line: if not trans.user_is_admin: raise AdminRequiredException() elif not trans.app.config.allow_path_paste: raise ConfigDoesNotAllowException() upload_path = line[len("file://"):] dataset_name = os.path.basename(upload_path) else: dataset_name = line if override_name: dataset_name = override_name yield Bunch(type='url', path=line, name=dataset_name) else: dataset_name = 'Pasted Entry' # we need to differentiate between various url pastes here if override_name: dataset_name = override_name yield Bunch(type='file', path=url_paste_file, name=dataset_name)
def validate_url(url, ip_allowlist): # If it doesn't look like a URL, ignore it. if not (url.lstrip().startswith('http://') or url.lstrip().startswith('https://')): return url # Strip leading whitespace before passing url to urlparse() url = url.lstrip() # Extract hostname component parsed_url = urlparse(url).netloc # If credentials are in this URL, we need to strip those. if parsed_url.count('@') > 0: # credentials. parsed_url = parsed_url[parsed_url.rindex('@') + 1:] # Percent encoded colons and other characters will not be resolved as such # so we don't have to either. # Sometimes the netloc will contain the port which is not desired, so we # need to extract that. port = None # However, it could ALSO be an IPv6 address they've supplied. if ':' in parsed_url: # IPv6 addresses have colons in them already (it seems like always more than two) if parsed_url.count(':') >= 2: # Since IPv6 already use colons extensively, they wrap it in # brackets when there is a port, e.g. http://[2001:db8:1f70::999:de8:7648:6e8]:100/ # However if it ends with a ']' then there is no port after it and # they've wrapped it in brackets just for fun. if ']' in parsed_url and not parsed_url.endswith(']'): # If this +1 throws a range error, we don't care, their url # shouldn't end with a colon. idx = parsed_url.rindex(':') # We parse as an int and let this fail ungracefully if parsing # fails because we desire to fail closed rather than open. port = int(parsed_url[idx + 1:]) parsed_url = parsed_url[:idx] else: # Plain ipv6 without port pass else: # This should finally be ipv4 with port. It cannot be IPv6 as that # was caught by earlier cases, and it cannot be due to credentials. idx = parsed_url.rindex(':') port = int(parsed_url[idx + 1:]) parsed_url = parsed_url[:idx] # safe to log out, no credentials/request path, just an IP + port log.debug("parsed url, port: %s : %s", parsed_url, port) # Call getaddrinfo to resolve hostname into tuples containing IPs. addrinfo = socket.getaddrinfo(parsed_url, port) # Get the IP addresses that this entry resolves to (uniquely) # We drop: # AF_* family: It will resolve to AF_INET or AF_INET6, getaddrinfo(3) doesn't even mention AF_UNIX, # socktype: We don't care if a stream/dgram/raw protocol # protocol: we don't care if it is tcp or udp. addrinfo_results = {info[4][0] for info in addrinfo} # There may be multiple (e.g. IPv4 + IPv6 or DNS round robin). Any one of these # could resolve to a local addresses (and could be returned by chance), # therefore we must check them all. for raw_ip in addrinfo_results: # Convert to an IP object so we can tell if it is in private space. ip = ipaddress.ip_address(unicodify(raw_ip)) # If this is a private address if ip.is_private: results = [] # If this IP is not anywhere in the allowlist for allowlisted in ip_allowlist: # If it's an IP address range (rather than a single one...) if hasattr(allowlisted, 'subnets'): results.append(ip in allowlisted) else: results.append(ip == allowlisted) if any(results): # If we had any True, then THIS (and ONLY THIS) IP address that # that specific DNS entry resolved to is in allowlisted and # safe to access. But we cannot exit here, we must ensure that # all IPs that that DNS entry resolves to are likewise safe. pass else: # Otherwise, we deny access. raise ConfigDoesNotAllowException( "Access to this address in not permitted by server configuration" ) return url