def urlsplit(url: str, scheme: str = '', allow_fragments: bool = True) -> urllib_parse.SplitResult: """Wrapper function for :func:`urllib.parse.urlsplit`. Args: url: URL to be split scheme: URL scheme allow_fragments: if allow fragments Returns: The split result. Note: The function suppressed possible errors when calling :func:`urllib.parse.urlsplit`. If any, it will return ``urllib.parse.SplitResult(scheme=scheme, netloc='', path=url, params='', query='', fragment='')`` directly. """ with contextlib.suppress(ValueError): return urllib_parse.urlsplit(url, scheme, allow_fragments=allow_fragments) return urllib_parse.SplitResult(scheme=scheme, netloc='', path=url, query='', fragment='')
def _decode_url(self, url, path): # Make sure urlsplit() doesn't choke on scheme-less URLs, like 'localhost:9200' if '//' not in url: url = '//' + url url = urlparse.urlsplit(url) if not url.netloc: raise ValueError('Could not parse the given URL.') # If the scheme isn't explicitly provided by now, try to deduce it # from the port number scheme = url.scheme if not scheme: if 9500 <= url.port <= 9600: scheme = 'thrift' else: scheme = 'http' # Use path if provided if not path: path = url.path # Set default ports netloc = url.netloc if not url.port: if url.scheme == 'http': netloc = "{0}:{1}".format(netloc, 9200) elif url.scheme == 'https': netloc = "{0}:{1}".format(netloc, 443) elif url.scheme == 'thrift': netloc = "{0}:{1}".format(netloc, 9500) # Return new url. return urlparse.SplitResult(scheme=scheme, netloc=netloc, path=path, query='', fragment='')
def _get_encoded_url(self) -> str: """Convert any UTF-8 char in :obj:`File.file_path` into a url encoded ASCII string.""" sres = urllib_parse.urlsplit(self.file_path) return urllib_parse.urlunsplit( urllib_parse.SplitResult(sres.scheme, sres.netloc, urllib_parse.quote(sres.path), sres.query, sres.fragment))
def normalize(self, url): """Normalize a URL. Returns (domain, normpath), where both the domain and the path are normalized strings. The path includes a query string if one was provided in the source URL. Any URL fragment is discarded. """ info = parse.urlsplit(url) dname = self._domain_aliases.get(info.netloc) if dname is None: raise Exception(f'illegal domain name {info.netloc!r} for URL {url!r}') # The main WWT web server, being IIS, is case-insensitive in its URL # paths. We define the downcased path as the normal form. We do not # currently normalize the query parts of the URL, which *might* be # case-insensitive depending on how a given API is implemented. normpath = info.path domain = self._get_domain(dname) if not domain.has_case_sensitive_paths(): normpath = normpath.lower() # Note that we discard the fragment (for now?). normpath = parse.SplitResult('', '', normpath, info.query, '') normpath = normpath.geturl() normpath = url_normalize(normpath) return (dname, normpath)
def extract_path(url): '''Extracts path (with parameters) from given url, if url is already a path (starts with /) it's rerurned without modifications. In case url is empty or only contains domain without trailing slash, returns a single slash.''' # This method was actually contributed by Wiktor Bachnik (wbachnik) # but because I forgot to rebase before my branch, this is going to # appear in a different commit :-/ if len(url) == 0: # empty, assume / path = '/' elif url[0] == '/': # url is already a path path = url else: # url is a proper url scheme://host/... parts = urlparse.urlsplit(url) needed_parts = urlparse.SplitResult(scheme='', netloc='', path=parts.path, query=parts.query, fragment='') path = needed_parts.geturl() if len(path) == 0: # case for http://example.com path = '/' # If there was a question mark in the url, but no query string # then we must still preserve the question mark. if (not parts.query) and ('?' in url): path = path + '?' return path
def path2url(path): sr = parse.SplitResult(scheme='file', netloc='', path=path.replace("\\", "/"), query='', fragment='') sr = parse.urlunsplit(sr) return sr
def make_url(scheme: typing.Optional[str] = None, netloc: typing.Optional[str] = None, path: typing.Optional[str] = None) -> str: return parse.SplitResult(scheme=(scheme or 'http').lower(), netloc=netloc or '', path=path or '', query='', fragment='').geturl()
def test_svn_plus_http(self): self.assertEquals( urlparse.SplitResult(scheme='svn+http', netloc='*****@*****.**', path='/svn/trunk', query='', fragment=''), urlparse.urlsplit('svn+http://[email protected]/svn/trunk'))
def _scrub_action_url(action): """Remove trust ID from a URL.""" url = netutils.urlsplit(action) if Alarm._is_trust_url(url): netloc = url.netloc.rsplit('@', 1)[-1] url = urlparse.SplitResult(url.scheme, netloc, url.path, url.query, url.fragment) return url.geturl()
def test_parse_endpoint(self): endpoint = 'http://example.com:9292' test_client = http.HTTPClient(endpoint, token=u'adc123') actual = test_client.parse_endpoint(endpoint) expected = parse.SplitResult(scheme='http', netloc='example.com:9292', path='', query='', fragment='') self.assertEqual(expected, actual)
def canonical_url(self, url): url_split = parse.urlsplit(url) without_query = parse.SplitResult(url_split.scheme, url_split.netloc, url_split.path, '', url_split.fragment).geturl() switcher = { "archive.ph": lambda: self.follow_archive_link(url), "www.tiktok.com": lambda: without_query } return switcher.get(url_split.hostname, lambda: url)()
def wp_mock(url: str, request: requests.PreparedRequest): assert url == parse.SplitResult(scheme='https', netloc='example.org', path='/wp-json/mt-wp-photo-analysis/v1/text/3001', query='', fragment='') assert request.method == 'PUT' assert request.headers['Authorization'] == 'Bearer asd561' assert request.headers['Content-Type'] == 'application/json' assert request.body == b'{"textAnnotations": "some Text Message (e)"}' return { 'status_code': 200, 'content': 'OK' }
def sanitize_url(url, mask_url_query=True, mask_url_path=False): parsed = parse.urlsplit(url) # masking - may be give some hints in masking query and path instead of '?' ?? host = '{}:{}'.format(parsed.hostname, parsed.port) if parsed.port else parsed.hostname query = str(parse.urlencode({k: '?' for k in parse.parse_qs(parsed.query).keys()})) if \ mask_url_query else parsed.query path = '/??/' if parsed.path and mask_url_path else parsed.path components = parse.SplitResult(parsed.scheme, host, path, query, parsed.fragment) return parse.urlunsplit(components)
def merge_url_params(redir_url): """ Merge the URL params of our incoming URL with the ones in our saved URL If there is a conflict, the ones in our saved URL always win. """ redir_parsed = parse.urlsplit(redir_url) file_redir_params = parse.parse_qs(redir_parsed.query) file_redir_params = {k: ' '.join(v) for k, v in file_redir_params.items()} merged_params = {**request.args, **file_redir_params} query_string = parse.urlencode(merged_params) dest_split = parse.SplitResult(*redir_parsed[0:3], query_string, '') return dest_split.geturl()
def expand_uri(uri, subpath): if uri.scheme == "gh": # https://github.com/fholmer/make/archive/master.zip path_parts = uri.path.split("/") master = "{0[1]}-master".format(path_parts) if not subpath: subpath = master if len(path_parts) > 2: subpath = "/".join([subpath] + path_parts[2:]) uri = parse.SplitResult("https", "github.com", "{}/archive/master.zip".format(uri.path), "", "") elif uri.scheme == "gl": # https://gitlab.com/fholmer/make/-/archive/master/make-master.zip # https://gitlab.com/fholmer/make/-/archive/master/make-master.zip?path=tests%2Fmake%2Fmake_project # make-master-tests-make-make_project path_parts = uri.path.split("/") master = "{0[1]}-master".format(path_parts) if subpath: qs = parse.urlencode({"path": subpath}) elif len(path_parts) > 2: qs = parse.urlencode({"path": "/".join(path_parts[2:])}) subpath_root = "-".join([master] + path_parts[2:]) subpath = "/".join([subpath_root] + path_parts[2:]) else: qs = "" subpath = master uri = parse.SplitResult( "https", "gitlab.com", "/{0[0]}/{0[1]}/-/archive/master/{1}.zip".format( path_parts, master), qs, "", ) return uri, subpath
def url(self, value: parse.SplitResult): final_path = value.path if not value.hostname: final_path = os.path.abspath(final_path) if not final_path.endswith(os.path.sep): final_path += os.path.sep if final_path != value.path: value = parse.SplitResult(scheme=value.scheme, netloc=value.netloc, path=final_path, query=value.query, fragment=None) self.__url = value
def _get_base_url(self, url): """ Prepare the host url to get the root folder /.""" parsed_url = URLParse.urlparse(url) path = parsed_url.path if is_url(parsed_url) else "" netloc = parsed_url.netloc if is_url(parsed_url) else url url_params = { "scheme": "http", "netloc": netloc, "path": path, "query": "", "fragment": "", } self.base_url = URLParse.SplitResult(**url_params)
def update_actions(self, old_alarm=None): trustor_user_id = pecan.request.headers.get('X-User-Id') trustor_project_id = pecan.request.headers.get('X-Project-Id') roles = pecan.request.headers.get('X-Roles', '') if roles: roles = roles.split(',') else: roles = [] auth_plugin = pecan.request.environ.get('keystone.token_auth') if old_alarm: prev_trust_ids = set(old_alarm._get_existing_trust_ids()) else: prev_trust_ids = set() trust_id = prev_trust_ids.pop() if prev_trust_ids else None trust_id_used = False for actions in (self.ok_actions, self.alarm_actions, self.insufficient_data_actions): if actions is not None: for index, action in enumerate(actions[:]): url = netutils.urlsplit(action) if self._is_trust_url(url): if '@' in url.netloc: errmsg = _("trust URL cannot contain a trust ID.") raise base.ClientSideError(errmsg) if trust_id is None: # We have a trust action without a trust ID, # create it trust_id = keystone_client.create_trust_id( pecan.request.cfg, trustor_user_id, trustor_project_id, roles, auth_plugin) if trust_id_used: pw = '' else: pw = ':delete' trust_id_used = True netloc = '%s%s@%s' % (trust_id, pw, url.netloc) url = urlparse.SplitResult(url.scheme, netloc, url.path, url.query, url.fragment) actions[index] = url.geturl() if trust_id is not None and not trust_id_used: prev_trust_ids.add(trust_id) for old_trust_id in prev_trust_ids: keystone_client.delete_trust_id(pecan.request.cfg, old_trust_id, auth_plugin)
def notify(self, action, alarm_id, alarm_name, severity, previous, current, reason, reason_data): trust_id = action.username client = keystone_client.get_trusted_client(self.conf, trust_id) # Remove the fake user netloc = action.netloc.split("@")[1] # Remove the trust prefix scheme = action.scheme[6:] action = parse.SplitResult(scheme, netloc, action.path, action.query, action.fragment) headers = {'X-Auth-Token': keystone_client.get_auth_token(client)} super(TrustAlarmNotifierMixin, self).notify(action, alarm_id, alarm_name, severity, previous, current, reason, reason_data, headers)
def open(url): """Parse a storage url, then locate and initialize a backend for it.""" parsed_url = urlparse.urlsplit(url) # If there is no scheme, fall back to treating the string as local path and # construct a file:/// URL. if not parsed_url.scheme: parsed_url = urlparse.SplitResult("file", "", quote(url), "", "") try: # TODO: Support a registry for schemes that don't map to a module. if re.match(r"^\w+$", parsed_url.scheme): handler = importlib.import_module("cumulus.store.%s" % parsed_url.scheme) obj = handler.Store(parsed_url) return obj except ImportError: # Fall through to error below pass raise NotImplementedError("Scheme %s not implemented" % scheme)
def add_query(self, pair_list): split = up.urlsplit(self.url) qs = up.parse_qs(split[3]).keys() qsl = up.parse_qsl(split[3]) added = set() for (add_name, add_value) in pair_list: if add_name not in qs: added.add(add_name) new_qsl = list() for (name, value) in qsl: for (add_name, add_value) in pair_list: if add_name == name: value = add_value new_qsl.append(tuple([name, value])) for (add_name, add_value) in pair_list: if add_name in added: new_qsl.append(tuple([add_name, add_value])) new_query = up.urlencode(new_qsl) self.url = up.urlunsplit( up.SplitResult(scheme=split[0], netloc=split[1], path=split[2], query=new_query, fragment=split[4]))
def read_configuration(self) -> 'JobLocation': """ Read configuration file from container subvolume :return: Corresponding location """ # Read configuration file out = self.exec_check_output('cat "%s"' % self.configuration_filename) file = out.decode().splitlines() corresponding_location = None parser = ConfigParser() parser.read_file(file) section = parser.sections()[0] # Section name implies location type if section == JobLocation.TYPE_SOURCE: location_type = JobLocation.TYPE_SOURCE elif section == JobLocation.TYPE_DESTINATION: location_type = JobLocation.TYPE_DESTINATION else: raise ValueError('invalid section name/location type [%s]' % section) # Parse config string values location_uuid = parser.get(section, self.__KEY_UUID, fallback=None) source = parser.get(section, self.__KEY_SOURCE, fallback=None) source_container = parser.get(section, self.__KEY_SOURCE_CONTAINER, fallback=None) destination = parser.get(section, self.__KEY_DESTINATION, fallback=None) # Keep has been renamed to retention. # Supporting the old name for backward compatibility. retention = parser.get(section, self.__KEY_RETENTION, fallback=None) if not retention: retention = parser.get(section, self.__KEY_KEEP, fallback=None) # Convert to instances where applicable location_uuid = UUID(location_uuid) if location_uuid else None source = parse.urlsplit(source) if source else None source_container = source_container if source_container else None destination = parse.urlsplit(destination) if destination else None retention = RetentionExpression(retention) if retention else None compress = True if distutils.util.strtobool(parser.get(section, self.__KEY_COMPRESS, fallback='False')) \ else False if location_type == JobLocation.TYPE_SOURCE: # Amend url/container relpath from current path for source locations # if container relative path was not provided if not self.container_subvolume_relpath: source_container = os.path.basename(self.container_subvolume_path.rstrip(os.path.sep)) source = parse.SplitResult(scheme=self.url.scheme, netloc=self.url.netloc, path=os.path.abspath(os.path.join(self.url.path, os.path.pardir)), query=self.url.query, fragment=None) self.url = source self.container_subvolume_relpath = source_container if destination: corresponding_location = JobLocation(destination, location_type=JobLocation.TYPE_DESTINATION) elif location_type == JobLocation.TYPE_DESTINATION: if source: corresponding_location = JobLocation(source, location_type=JobLocation.TYPE_SOURCE, container_subvolume_relpath=source_container) self.location_type = location_type self.uuid = location_uuid self.retention = retention self.compress = compress return corresponding_location
from urllib import parse u = parse.SplitResult('http', 'docs.python.org', '/library', 'west=left', fragment='') s = parse.urlunsplit(u) print(s)
def make_abs_url(url): pr = parse.urlsplit(url) return parse.urlunsplit(parse.SplitResult(pr.scheme, pr.netloc, path.abspath(pr.path), '',''))
def to_uri(self): scheme, key, query = self._get_uri_scheme_path_and_query() uri = parse.SplitResult(scheme, "", key, parse.urlencode(query).replace("&", ";"), "") return uri.geturl()