def to_python(self, value): def split_url(url): """ Return a list of url parts via urlparse.urlsplit(), or raise ValidationError for some malformed URLs. """ try: return list(urlsplit(url)) except ValueError: # urlparse.urlsplit can raise a ValueError with some # misformatted URLs. raise ValidationError(self.error_messages['invalid'], code='invalid') value = super().to_python(value) if value: url_fields = split_url(value) if not url_fields[0]: # If no URL scheme given, assume http:// url_fields[0] = 'http' if not url_fields[1]: # Assume that if no domain is provided, that the path segment # contains the domain. url_fields[1] = url_fields[2] url_fields[2] = '' # Rebuild the url_fields list, since the domain segment may now # contain the path too. url_fields = split_url(urlunsplit(url_fields)) value = urlunsplit(url_fields) return value
def encode(self, path, parameters=None): ''' @see: EncoderPath.encode ''' assert isinstance(path, (Path, str)), 'Invalid path %s' % path if isinstance(path, Path): assert isinstance(path, Path) url = deque() url.append(self.root) url.append('/'.join(path.toPaths(self.converterPath))) if self.extension: url.append('.') url.append(self.extension) elif path.node.isGroup: url.append('/') query = urlencode(parameters) if parameters else '' return urlunsplit((self.scheme, self.host, ''.join(url), query, '')) else: assert isinstance(path, str), 'Invalid path %s' % path if not path.strip().startswith('/'): # TODO: improve the relative path detection # This is an absolute path so we will return it as it is. return path # The path is relative to this server so we will convert it in an absolute path url = urlsplit(path) return urlunsplit((self.scheme, self.host, url.path, url.query, url.fragment))
def extract_password_row(self, row): res = '' hostname_split = urlparse.urlsplit(row[0]) website = urlparse.urlunsplit((hostname_split.scheme, hostname_split.netloc, "", "", "")).strip('\n') username = '' password = '' form_url = '' user_field = '' pass_field = '' form_url_split = urlparse.urlsplit(row[1]) form_url = urlparse.urlunsplit((form_url_split.scheme, form_url_split.netloc, "", "", "")).strip('\n') #print('\nusername = '******' password RAW = ', row[5]) password = self.decode_password(row[5]) try: username = row[3] try: password = self.decode_password(row[5]) self.num_passwords += 1 pass except: print('ERROR - password = '******'non password entry (blacklists - ignoring)') res = self.format_list_csv([website, username, form_url, user_field, pass_field, password]) return res
def zoom_article(self, ticket_id, article_id): art_descr = self.__db.article_description(article_id) if art_descr[4] & ART_TEXT: return eval(self.__db.article_message(article_id)) self.echo("Zoom article:", ticket_id, article_id) url_beg = urlsplit(self.runtime.get("site"))[:3] params = ( ("Action", "AgentTicketZoom"), ("Subaction", "ArticleUpdate"), ("TicketID", ticket_id), ("ArticleID", article_id), ("OTRSAgentInterface", self.runtime["OTRSAgentInterface"])) url = urlunsplit(url_beg + (urlencode(params), "")) pg = TicketsPage(self.core) page = pg.load(url) if page is None: return mail_header = page.get("mail_header", []) if "mail_src" in page: url = urlunsplit(url_beg[:2] + urlsplit(page["mail_src"])[2:]) self.echo("Get message:", url) pg = MessagePage(self.core) try: mail_text = pg.load(url) except LoginError: mail_text = pg.login() else: mail_text = page["message_text"] if mail_header: mail_text.insert(0, ("\n",)) for i in reversed(mail_header): mail_text.insert(0, ("%s\t%s\n" % i,)) shrink_tupled_text(mail_text) self.__db.article_message(article_id, repr(mail_text)) return mail_text
def oauth(self, req, credentials = None, params = {}): #NOTE: While flickr supports HTTPS in its oauth endpoints, flickr #thinks that the HTTPS endpoints are being accessed via HTTP, and thus #constructs the signature base string accordingly, which #will hence not match the signature base string generated by #pyoauth1client. We solve this by replacing HTTPS with HTTP #when generating the signature base string, and then revert the change #after the base string is generated. This way the signature #base string will match the one generated by flickr even though #we are accessing the endpoints via HTTPS for ADDED SECURITY!!!111one x = urlsplit(req.url) if x.scheme == "https": #Remove the HTTPS Scheme https = True x = x._replace(scheme = "http") req = req._replace(url = urlunsplit(x)) else: https = False y = super().oauth(req, credentials, params) if https: #Add back the HTTPS scheme x = urlsplit(y.url) x = x._replace(scheme = "https") y = y._replace(url = urlunsplit(x)) return y
def clean_url(value): """ Taken from Django' URLField, this helps to normalize URLs. Raises a ValueError if an invalid url is passed. Example: >>> clean_url("www.google.com") "http://www.google.com" >>> clean_url("_.com") Traceback (most recent call last): File "<stdin>", line 1, in <module> ValueError: Enter a valid URL. """ if value: value = value.strip() value = value.encode('ascii', 'ignore').decode("utf-8") url_fields = list(urlsplit((value))) if not url_fields[0]: # If no URL scheme given, assume http:// url_fields[0] = 'http' if not url_fields[1]: # Assume that if no domain is provided, that the path segment # contains the domain. url_fields[1] = url_fields[2] url_fields[2] = '' # Rebuild the url_fields list, since the domain segment may now # contain the path too. url_fields = list(urlsplit((urlunsplit(url_fields)))) if not url_fields[2]: # the path portion may need to be added before query params url_fields[2] = '/' value = urlunsplit(url_fields) return value
def encode(self, path, parameters=None): """ @see: EncoderPath.encode """ assert isinstance(path, (Path, str)), "Invalid path %s" % path if isinstance(path, Path): assert isinstance(path, Path) url = deque() url.append(self.root) url.append("/".join(path.toPaths(self.converterPath))) if self.extension: url.append(".") url.append(self.extension) elif path.node.isGroup: url.append("/") query = urlencode(parameters) if parameters else "" return urlunsplit((self.scheme, self.host, quote("".join(url)), query, "")) else: assert isinstance(path, str), "Invalid path %s" % path if not path.strip().startswith("/"): # TODO: improve the relative path detection # This is an absolute path so we will return it as it is. return quote(path) # The path is relative to this server so we will convert it in an absolute path url = urlsplit(path) return urlunsplit((self.scheme, self.host, quote(url.path), url.query, url.fragment))
def authorizeApplication(app_id, username, password): '''Authorize an application to access a systems data and get the user_id''' scheme = 'https' base_url = 'enlighten.enphaseenergy.com' action = 'app_user_auth/new' query = p.urlencode({'app_id':app_id}) request1 = p.urlunsplit((scheme,base_url,action,query,'')) logging.debug(request1) opener = r.build_opener(r.HTTPCookieProcessor()) opener.addheaders = [('User-agent','Mozilla/5.0')] r1 = opener.open(request1) action,hiddens = EnphaseInterface._processPage(r1) payload = {'user[email]':username,'user[password]':password} hiddens.update(payload) request2 = p.urlunsplit((scheme,base_url,action,query,'')) r2 = opener.open(request2,p.urlencode(hiddens).encode(encoding='UTF-8')) action, hiddens = EnphaseInterface._processPage(r2) request3 = p.urlunsplit((scheme,base_url,action,query,'')) r3 = opener.open(request3,p.urlencode(hiddens).encode(encoding='UTF-8')) if 'enlighten-api-user-id' not in r3.info(): logging.critical('Failed to aquire user_id') logging.debug(r3.info()['enlighten-api-user-id']) return r3.info()['enlighten-api-user-id']
def to_python(self, value): def split_url(url): """ Returns a list of url parts via ``urlparse.urlsplit`` (or raises a ``ValidationError`` exception for certain). """ try: return list(urlsplit(url)) except ValueError: # urlparse.urlsplit can raise a ValueError with some # misformatted URLs. raise ValidationError(self.error_messages['invalid']) value = super(URLField, self).to_python(value) if value: url_fields = split_url(value) if not url_fields[0]: # If no URL scheme given, assume http:// url_fields[0] = 'http' if not url_fields[1]: # Assume that if no domain is provided, that the path segment # contains the domain. url_fields[1] = url_fields[2] url_fields[2] = '' # Rebuild the url_fields list, since the domain segment may now # contain the path too. url_fields = split_url(urlunsplit(url_fields)) if not url_fields[2]: # the path portion may need to be added before query params url_fields[2] = '/' value = urlunsplit(url_fields) return value
def items(id:int=None) -> str: valid_params = {'1': True, '0': False} starred = valid_params.get(request.query.getone('starred')) read = valid_params.get(request.query.getone('read')) channel_ids = [int(i) for i in request.query.getlist('channel')] channel_ids += [id] if id is not None else [] since_id = request.query.since_id max_id = request.query.max_id count = int(request.query.count) if request.query.count else 25 page = int(request.query.page) if request.query.page else 1 search = request.query.q query = Item.select() #for channel_id in channel_ids: if channel_ids: query = query.where(Item.channel << channel_ids) if starred: query = query.where(Item.starred == starred) if read: query = query.where(Item.read == read) if since_id: query = query.where(Item.id >= since_id) if max_id: query = query.where(Item.id <= max_id) if search: search = '%' + search + '%' query = query.where(Item.title ** search | Item.description ** search | Item.author ** search) #total_count = query.count() if page and count: query = query.paginate(page, count) for it in query: it.new = False it.save() out = {'items': list(query.order_by(Item.updated.desc()).limit(count))} channels = Channel.select().order_by(Channel.title) for c in channels: c.filter = True if c.id in channel_ids else False #if channel: #Item.update(new=False).where(Item.channel == channel).execute() params = {} for p in request.query.keys(): params[p] = request.query.getall(p) params['page'] = page + 1 out['next'] = urlunsplit(('', '', request.fullpath, urlencode(params, doseq=True), '')) params['page'] = page - 1 if page > 1 else 1 out['prev'] = urlunsplit(('', '', request.fullpath, urlencode(params, doseq=True), '')) if page > 1 else None if request_accept_json(): return out else: return template('index', out, is_active=is_active, favicon=favicon, date_format=date_format, channels=channels)
def assert_redirects_to(self, response, url_name, status_code=302, target_status_code=200, host=None, msg_prefix='', *args, **kwargs): ''' Assert that the response is a redirect to a resolved url and that the URL can be loaded. It differs from Django TestCase.assertRedirects on the following points: - Takes a resolable url name as parameter - Query params are not taken in account for URL comparison, only for status code retrieval. ''' if msg_prefix: msg_prefix += ": " if hasattr(response, 'redirect_chain'): # The request was a followed redirect self.assertTrue(len(response.redirect_chain) > 0, msg_prefix + "Response didn't redirect as expected: Response" " code was %d (expected %d)" % (response.status_code, status_code)) self.assertEqual(response.redirect_chain[0][1], status_code, msg_prefix + "Initial response didn't redirect as expected:" " Response code was %d (expected %d)" % (response.redirect_chain[0][1], status_code)) url, status_code = response.redirect_chain[-1] self.assertEqual(response.status_code, target_status_code, msg_prefix + "Response didn't redirect as expected: Final" " Response code was %d (expected %d)" % (response.status_code, target_status_code)) else: # Not a followed redirect self.assertEqual(response.status_code, status_code, msg_prefix + "Response didn't redirect as expected: Response" " code was %d (expected %d)" % (response.status_code, status_code)) url = response['Location'] scheme, netloc, path, query, fragment = urlsplit(url) url = urlunsplit((scheme, netloc, path, None, None)) redirect_response = response.client.get(path, QueryDict(query)) # Get the redirection page, using the same client that was used # to obtain the original response. self.assertEqual(redirect_response.status_code, target_status_code, msg_prefix + "Couldn't retrieve redirection page '%s':" " response code was %d (expected %d)" % (path, redirect_response.status_code, target_status_code)) path = reverse(url_name, *args, **kwargs) expected_url = urlunsplit(('http', host or 'testserver', path, None, None)) self.assertEqual(url, expected_url, msg_prefix + "Response redirected to '%s', expected '%s'" % (url, expected_url))
def compute_url_from_payload(self, data_item): url = data_item[DomFuzzerQueueTable.URL] target = data_item[DomFuzzerQueueTable.TARGET] param = data_item[DomFuzzerQueueTable.PARAM] test = data_item[DomFuzzerQueueTable.TEST] if 'url' == target: if not param: return url + test else: # TODO: fix me return url + test + '=X' splitted = urlparse.urlsplit(url) if 'fragment' == target: url_field = splitted.fragment elif 'query' == target: url_field = splitted.query else: raise Exception('unsupported target: %s' % (target)) if not url_field: raise Exception('missing URL field in url: %s' % (url)) else: # TODO: this duplicates previous work, so could consider pre-storing target urls? url_io = StringIO() pairs = self.re_delim.split(url_field) found = False for offset in range(0, len(pairs)): values = pairs[offset] if values == ';' or values == '&': url_io.write(values) continue if '=' in values: name, value = values.split('=', 1) separator = '=' else: name, value = values, '' separator = '' if name == param: value += test found = True url_io.write(name) url_io.write(separator) url_io.write(value) if not found: url_io.write(test) if 'fragment' == target: target_url = urlparse.urlunsplit((splitted.scheme, splitted.netloc, splitted.path, splitted.query, url_io.getvalue())) elif 'query' == target: target_url = urlparse.urlunsplit((splitted.scheme, splitted.netloc, splitted.path, url_io.getvalue(), splitted.fragment)) return target_url
def doEncodePath(path): ''' Do encode the path. ''' assert isinstance(path, str), 'Invalid path %s' % path url = urlsplit(path) if url.scheme or url.netloc: return urlunsplit((url.scheme, url.netloc, url.path, url.query, url.fragment)) # Is a relative URI so we append the scheme and host. return urlunsplit((scheme, host, url.path, url.query, url.fragment))
def children(root, soup): """ Return a set of child URLs within a HTML soup, relative to the given root """ # Establish the root URL base parameters root_s = urlparse.urlsplit(root.url) root_url = urlparse.urlunsplit(root_s) root_url_slash = urlparse.urlunsplit( (root_s.scheme, root_s.netloc, "/", root_s.query, "") ) # Collect all interesting <a> tags from the soup and obtain their href-s: fetch = set() for link in soup.find_all("a"): href = link.get("href") if not href: continue # Split the href into its components s = urlparse.urlsplit(href) if s.scheme and s.scheme not in {"http", "https"}: # Not HTTP continue if s.netloc and not ( s.netloc == root.domain or s.netloc.endswith("." + root.domain) ): # External domain - we're not interested continue # Seems to be a bug in urllib: fragments are put into the # path if there is no canonical path newpath = s.path if newpath.startswith("#") or newpath.startswith("/#"): newpath = "" if not newpath and not s.query: # No meaningful path info present continue # Make sure the newpath is properly urlencoded if newpath: newpath = urlparse.quote(newpath) # Fill in missing stuff from the root URL base parameters newurl = ( s.scheme or root_s.scheme, s.netloc or root_s.netloc, newpath, s.query, "" ) # Make a complete new URL to fetch url = urlparse.urlunsplit(newurl) if url in {root_url, root_url_slash}: # Exclude the root URL continue # Looks legit: add to the fetch set fetch.add(url) return fetch
def webfuzzer_populate_response_id(self, Id): self.clear_data_dictionary() row = self.Data.read_responses_by_id(self.cursor, Id) if not row: return responseItems = interface.data_row_to_response_items(row) url = responseItems[ResponsesTable.URL] reqHeaders = responseItems[ResponsesTable.REQ_HEADERS].decode('utf-8', 'ignore') reqData = responseItems[ResponsesTable.REQ_DATA].decode('utf-8', 'ignore') method = responseItems[ResponsesTable.REQ_METHOD] splitted = urlparse.urlsplit(url) # Create a new parsed object removing the scheme and netloc base_url = urlparse.urlunsplit((splitted[0], splitted[1], splitted[2], '', '')) req_loc = ("", "", "", splitted.query, splitted.fragment) useragent = self.framework.useragent() has_cookie = False template = StringIO() template.write('${method} ${request_uri}%s HTTP/1.1\n' % urlparse.urlunsplit(req_loc)) first = True for line in reqHeaders.splitlines(): if not line: break if first and self.re_request.match(line): first = False continue if ':' in line: name, value = [v.strip() for v in line.split(':', 1)] lname = name.lower() if 'host' == lname: if splitted.hostname and value == splitted.hostname: template.write('Host: ${host}\n') continue elif 'user-agent' == lname: if useragent == value: template.write('User-Agent: ${user_agent}\n') continue template.write(line) template.write('\n') template.write('\n') template.write(reqData) self.set_combo_box_text(self.mainWindow.stdFuzzerReqMethod, method.upper()) self.mainWindow.wfStdUrlEdit.setText(base_url) self.mainWindow.wfStdEdit.setPlainText(template.getvalue())
def do_populateExistingFuzzData(self): self.qlock.lock() try: rows = [] dup_rows = [] already_seen = {} for row in self.Data.get_dom_fuzzer_queue_items(self.read_cursor, 'P'): data_item = [m or '' for m in row] # check for duplicates and prioritize uniques first url = data_item[DomFuzzerQueueTable.URL] target = data_item[DomFuzzerQueueTable.TARGET] param = data_item[DomFuzzerQueueTable.PARAM] test = data_item[DomFuzzerQueueTable.TEST] # TODO: remove this # if 'fragment' == target or '#' == url[-1]: # dup_rows.append(data_item) # continue splitted = urlparse.urlsplit(url) if 'url' == target: dupcheck = urlparse.urlunsplit((splitted.scheme, splitted.netloc, splitted.path, '', '')) else: qs_values = None if 'query' == target and splitted.query: qs_values = urlparse.parse_qs(splitted.query, True) dupcheck = urlparse.urlunsplit((splitted.scheme, splitted.netloc, splitted.path, '&'.join(list(qs_values.keys())), splitted.fragment)) elif 'fragment' == target and splitted.fragment: qs_values = urlparse.parse_qs(splitted.fragment, True) dupcheck = urlparse.urlunsplit((splitted.scheme, splitted.netloc, splitted.path, splitted.query, '&'.join(list(qs_values.keys())))) else: dupcheck = url dupcheck = '%s||%s||%s' % (dupcheck, param, test) if dupcheck not in already_seen: rows.append(data_item) already_seen[dupcheck] = True else: dup_rows.append(data_item) self.queueDataModel.append_data(rows) self.queueDataModel.append_data(dup_rows) rows = [] for row in self.Data.read_dom_fuzzer_results_info(self.read_cursor): rows.append([m or '' for m in row]) self.resultsDataModel.append_data(rows) finally: self.qlock.unlock()
def set_url_cred(url, username=None, password=None, _protocols=('http', 'https')): urlparts = list(urlsplit(url)) if urlparts[0] not in _protocols: return url if '@' in urlparts[1]: urlparts[1] = urlparts[1].split('@')[-1] if username is None or password is None: return urlunsplit(urlparts) urlparts[1] = '%s:%s@%s' % (username, password, urlparts[1]) return urlunsplit(urlparts)
def url(self, name, force=False): """ Returns the real URL in DEBUG mode. """ if settings.DEBUG and not force: hashed_name, fragment = name, '' else: clean_name, fragment = urldefrag(name) if urlsplit(clean_name).path.endswith('/'): # don't hash paths hashed_name = name else: cache_key = self.cache_key(name) hashed_name = self.cache.get(cache_key) if hashed_name is None: hashed_name = self.hashed_name(clean_name).replace('\\', '/') # set the cache if there was a miss # (e.g. if cache server goes down) self.cache.set(cache_key, hashed_name) final_url = super(CachedFilesMixin, self).url(hashed_name) # Special casing for a @font-face hack, like url(myfont.eot?#iefix") # http://www.fontspring.com/blog/the-new-bulletproof-font-face-syntax query_fragment = '?#' in name # [sic!] if fragment or query_fragment: urlparts = list(urlsplit(final_url)) if fragment and not urlparts[4]: urlparts[4] = fragment if query_fragment and not urlparts[3]: urlparts[2] += '?' final_url = urlunsplit(urlparts) return unquote(final_url)
def hashed_name(self, name, content=None): parsed_name = urlsplit(unquote(name)) clean_name = parsed_name.path.strip() opened = False if content is None: if not self.exists(clean_name): raise ValueError("The file '%s' could not be found with %r." % (clean_name, self)) try: content = self.open(clean_name) except IOError: # Handle directory paths and fragments return name opened = True try: file_hash = self.file_hash(clean_name, content) finally: if opened: content.close() path, filename = os.path.split(clean_name) root, ext = os.path.splitext(filename) if file_hash is not None: file_hash = ".%s" % file_hash hashed_name = os.path.join(path, "%s%s%s" % (root, file_hash, ext)) unparsed_name = list(parsed_name) unparsed_name[2] = hashed_name # Special casing for a @font-face hack, like url(myfont.eot?#iefix") # http://www.fontspring.com/blog/the-new-bulletproof-font-face-syntax if '?#' in name and not unparsed_name[3]: unparsed_name[2] += '?' return urlunsplit(unparsed_name)
def trace_node(self, stats, root): hostname = self.details.hostname or 'unknown' try: port = self.details.port except: port = None netloc = port and ('%s:%s' % (hostname, port)) or hostname method = self.method or '' name = 'External/%s/%s/%s' % (netloc, self.library, method) name = root.string_table.cache(name) start_time = newrelic.core.trace_node.node_start_time(root, self) end_time = newrelic.core.trace_node.node_end_time(root, self) children = [] root.trace_node_count += 1 params = {} details = self.details url = urlparse.urlunsplit((details.scheme, details.netloc, details.path, '', '')) params['url'] = url return newrelic.core.trace_node.TraceNode(start_time=start_time, end_time=end_time, name=name, params=params, children=children, label=None)
def smart_urlquote(url): "Quotes a URL if it isn't already quoted." def unquote_quote(segment): segment = unquote(segment) # Tilde is part of RFC3986 Unreserved Characters # http://tools.ietf.org/html/rfc3986#section-2.3 # See also http://bugs.python.org/issue16285 segment = quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + str('~')) return force_text(segment) # Handle IDN before quoting. try: scheme, netloc, path, query, fragment = urlsplit(url) except ValueError: # invalid IPv6 URL (normally square brackets in hostname part). return unquote_quote(url) try: netloc = netloc.encode('idna').decode('ascii') # IDN -> ACE except UnicodeError: # invalid domain part return unquote_quote(url) if query: # Separately unquoting key/value, so as to not mix querystring separators # included in query values. See #22267. query_parts = [(unquote(q[0]), unquote(q[1])) for q in parse_qsl(query, keep_blank_values=True)] # urlencode will take care of quoting query = urlencode(query_parts) path = unquote_quote(path) fragment = unquote_quote(fragment) return urlunsplit((scheme, netloc, path, query, fragment))
def next_page_url(url): """ returns current url with next page or page 1 """ pu = urlsplit(url) qs = parse_qs(pu.query) if qs is None: qs = {'page':1} elif not qs.get('page'): qs.update({'page':1}) else: try: p = int(qs['page'][0], base=10) p += 1 qs['page'] = str(p) except Exception as E: msg = 'Inavlid page url because {}'.format(E) logging.warning(msg) return url query = urlencode(qs, doseq=True) next_url = urlunsplit((pu.scheme, pu.netloc, pu.path, query, pu.fragment)) return next_url
def _create_start_page(self): """ This command creates a default start page, and returns the URL of this page. """ tmpdir = GEOVIEW_SUBPATH data = """ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" \ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" > <head> <meta http-equiv="content-type" content="text/html; charset=utf-8"/> <title>%(title)s</title> </head> <body > <H4>%(content)s%(gramps_home_url)s</H4> </body> </html> """ % { 'height' : 600, 'title' : _('Start page for the Html View'), 'content': _('Type a webpage address at the top, and hit' ' the execute button to load a webpage in this' ' page\n<br>\nFor example: '), 'gramps_home_url' : "<b>%s</b>" % URL_HOMEPAGE } filename = os.path.join(tmpdir, 'startpage.html') # Now we have two views : Web and Geography, we need to create the # startpage only once. if not os.path.exists(filename): ufd = open(filename, "w+", encoding="utf-8") ufd.write(data) ufd.close() return urlunsplit(('file', '', URL_SEP.join(filename.split(os.sep)), '', ''))
def send(self, url, method='GET', data=None, headers={}): # for testing, URLs just need to have the path an query url_parsed = urlsplit(url) url = urlunsplit(('', '', url_parsed.path, url_parsed.query, url_parsed.fragment)) # append the authentication headers to all requests headers = headers.copy() headers['Authorization'] = self.auth headers['Content-Type'] = 'application/json' headers['Accept'] = 'application/json' # convert JSON data to a string if data: data = json.dumps(data) # send request to the test client and reurn the response with self.app.test_request_context(url, method=method, data=data, headers=headers): rv = self.app.preprocess_request() if rv is None: rv = self.app.dispatch_request() rv = self.app.make_response(rv) rv = self.app.process_response(rv) return rv, json.loads(rv.data.decode('utf-8'))
def run_http_server(redirect_uri = None, modify_port = True, port_range = (10000, 10010) ): """Returns (modified) redirect_uri""" from multiprocessing import Process, Pipe from urllib.parse import urlsplit, urlunsplit if redirect_uri is None: redirect_uri = "http://localhost" p = urlsplit(redirect_uri) #Ensure hostname is localhost or 127.0.0.1 if p.hostname != "127.0.0.1" and p.hostname != "localhost": raise ValueError("url must have host of 127.0.0.1 or localhost! Got: {}".format(p.hostname)) if not modify_port: if p.port is not None: port_range = (int(p.port), int(p.port)) else: port_range = (int(80), int(80)) parent_port_pipe, child_port_pipe = Pipe() parent_pipe, child_pipe = Pipe() httpd_p = Process(target = _run_http_server, args = (child_port_pipe, child_pipe, port_range)) httpd_p.start() if parent_port_pipe.poll(3000): final_port = parent_port_pipe.recv() else: raise Exception("Timeout waiting for HTTP server process to start") if final_port == 0: #Could not find a port raise Exception("Could not find open port") netloc = "{0}:{1}".format(p.hostname, final_port) if p.path: path = p.path else: path = '/' p = p._replace(netloc = netloc, path = path) return (urlunsplit(p), parent_pipe, httpd_p)
def remove_utm_tags(guid): parts = list(urlsplit(guid)) qs = parse_qs(parts[3]) # [3] is query component filtered = sorted([(k, v) for k, v in qs.items() if not k.startswith('utm_')]) parts[3] = urlencode(filtered, doseq=True) return urlunsplit(parts)
def _url(self, hashed_name_func, name, force=False, hashed_files=None): """ Return the non-hashed URL in DEBUG mode. """ if settings.DEBUG and not force: hashed_name, fragment = name, '' else: clean_name, fragment = urldefrag(name) if urlsplit(clean_name).path.endswith('/'): # don't hash paths hashed_name = name else: args = (clean_name,) if hashed_files is not None: args += (hashed_files,) hashed_name = hashed_name_func(*args) final_url = super().url(hashed_name) # Special casing for a @font-face hack, like url(myfont.eot?#iefix") # http://www.fontspring.com/blog/the-new-bulletproof-font-face-syntax query_fragment = '?#' in name # [sic!] if fragment or query_fragment: urlparts = list(urlsplit(final_url)) if fragment and not urlparts[4]: urlparts[4] = fragment if query_fragment and not urlparts[3]: urlparts[2] += '?' final_url = urlunsplit(urlparts) return unquote(final_url)
def update_querystring(url, querystringargs, ignore_none_values=True): """ Update the querystring portion of the given ``url``. Parameters: querystringargs (dict): The querystring args to add/replace. ignore_none_values (bool): If this is ``True`` (default), we ignore ``None`` values in ``querystringargs``. Returns: The updated url. Examples: Add querystring argument:: from django_cradmin import urlutils urlutils.update_querystring('http://example.com', {'search': 'test'}) Update querystring argument:: urlutils.update_querystring('http://example.com?search=something&page=2', {'search': 'updated'}) """ parsed_url = urlsplit(url) querydict = create_querydict(querystringargs=querystringargs, initial_query_string=parsed_url.query, ignore_none_values=ignore_none_values) return urlunsplit(( parsed_url.scheme, parsed_url.netloc, parsed_url.path, querydict.urlencode(), parsed_url.fragment ))
def ML(URL, *args, _fragment=Undefined, **kwargs): # args is a list of 2-tuples (name, value) to be appended to the query string # kwargs is a mapping of name,value pairs which REPLACE the query string item(s) with that name url = list(urlsplit(URL)) if _fragment is Undefined: pass elif _fragment is None: url[4] = None else: url[4] = str(_fragment) # qs = parse_qsl(url[3]) qs = [] for q in parse_qsl(url[3]) : if q[0] not in ("EM", "IM"): qs.append((q[0],q[1])) for v in args : qs.append(v) # first filter out any keys from kwargs, then append the keys and values at the end qs = \ [v for v in qs if v[0] not in kwargs] + \ [(k, kwargs[k]) for k in sorted(kwargs) if kwargs[k] is not None] url[3] = urlencode(qs) return urlunsplit(url)
def raw_request(self, url, extra_post_data, method="GET"): scheme, netloc, path, query, fragment = urlsplit(url) post_data = None headers = self.http_headers method = method.upper() if extra_post_data or method == "POST": post_data = simplejson.dumps(extra_post_data) headers["Content-Length"] = str(len(post_data)) headers["Authorization"] = "token %s" % self.access_token else: query = self.encode_authentication_data(parse_qs(query)) url = urlunsplit((scheme, netloc, path, query, fragment)) response, content = self._http.request(url, method, post_data, headers) if LOGGER.isEnabledFor(logging.DEBUG): logging.debug("URL: %r POST_DATA: %r RESPONSE_TEXT: %r", url, post_data, content) if response.status >= 400: raise HttpError( "Unexpected response from github.com %d: %r" % (response.status, content), content, response.status ) if response.status != 204: json = simplejson.loads(content.decode(charset_from_headers(response))) else: json = {"success": True} if "error" in json: raise self.GithubError(json["error"][0]["error"]) return json
def _escape_unicode_chars(url): """Escape Unicode characters in the URL path""" url = list(parse.urlsplit(url)) url[2] = parse.quote(url[2]) return parse.urlunsplit(url)
def path_and_query(url, extra_query_params=()): split = urlsplit(url) query = '&'.join((split.query, *extra_query_params)) return quote(urlunsplit(('', '', split.path.lstrip('/'), query, '')))
def urlescape(url): scheme, netloc, path, query, fragment = urlsplit(url) return urlunsplit((scheme, netloc, quote(path), query, fragment))
def get_commit_footer(self): portage_version = getattr(portage, "VERSION", None) gpg_key = self.repoman_settings.get("PORTAGE_GPG_KEY", "") dco_sob = self.repoman_settings.get("DCO_SIGNED_OFF_BY", "") report_options = [] if self.options.force: report_options.append("--force") if self.options.ignore_arches: report_options.append("--ignore-arches") if self.scanner.include_arches is not None: report_options.append( "--include-arches=\"%s\"" % " ".join(sorted(self.scanner.include_arches))) if portage_version is None: sys.stderr.write("Failed to insert portage version in message!\n") sys.stderr.flush() portage_version = "Unknown" # Common part of commit footer commit_footer = "" for tag, bug in chain((('Bug', x) for x in self.options.bug), (('Closes', x) for x in self.options.closes)): # case 1: pure number NNNNNN if bug.isdigit(): bug = 'https://bugs.gentoo.org/%s' % (bug, ) else: purl = urlsplit(bug) qs = parse_qs(purl.query) # case 2: long Gentoo bugzilla URL to shorten if (purl.netloc == 'bugs.gentoo.org' and purl.path == '/show_bug.cgi' and tuple(qs.keys()) == ('id', )): bug = urlunsplit(('https', purl.netloc, qs['id'][-1], '', purl.fragment)) # case 3: bug tracker w/ http -> https elif (purl.scheme == 'http' and purl.netloc in self.https_bugtrackers): bug = urlunsplit(('https', ) + purl[1:]) commit_footer += "\n%s: %s" % (tag, bug) # Use new footer only for git (see bug #438364). if self.vcs_settings.vcs in ["git"]: commit_footer += "\nPackage-Manager: Portage-%s, Repoman-%s" % ( portage.VERSION, VERSION) if report_options: commit_footer += "\nRepoMan-Options: " + " ".join( report_options) if self.repo_settings.sign_manifests: commit_footer += "\nManifest-Sign-Key: %s" % (gpg_key, ) else: unameout = platform.system() + " " if platform.system() in ["Darwin", "SunOS"]: unameout += platform.processor() else: unameout += platform.machine() commit_footer += "\n(Portage version: %s/%s/%s" % \ (portage_version, self.vcs_settings.vcs, unameout) if report_options: commit_footer += ", RepoMan options: " + " ".join( report_options) if self.repo_settings.sign_manifests: commit_footer += ", signed Manifest commit with key %s" % \ (gpg_key, ) else: commit_footer += ", unsigned Manifest commit" commit_footer += ")" if dco_sob: commit_footer += "\nSigned-off-by: %s" % (dco_sob, ) return commit_footer
def get_base_url(url): """Get the base url of page""" split_url = urlsplit(url) base_url = urlunsplit((split_url.scheme, split_url.netloc, "", "", "")) return base_url
def test_url_build(self): url = BaseURL('http://localhost:9000', None) eq_( urlunsplit(url.build("GET", None, bucket_name='bucket-name')), 'http://localhost:9000/bucket-name', ) eq_( urlunsplit( url.build("GET", None, bucket_name='bucket-name', object_name='objectName'), ), 'http://localhost:9000/bucket-name/objectName', ) eq_( urlunsplit( url.build("GET", 'us-east-1', bucket_name='bucket-name', object_name='objectName', query_params={'foo': 'bar'}), ), 'http://localhost:9000/bucket-name/objectName?foo=bar', ) eq_( urlunsplit( url.build("GET", 'us-east-1', bucket_name='bucket-name', object_name='objectName', query_params={ 'foo': 'bar', 'b': 'c', 'a': 'b' }), ), 'http://localhost:9000/bucket-name/objectName?a=b&b=c&foo=bar', ) eq_( urlunsplit( url.build("GET", 'us-east-1', bucket_name='bucket-name', object_name='path/to/objectName/'), ), 'http://localhost:9000/bucket-name/path/to/objectName/', ) # S3 urls. url = BaseURL('https://s3.amazonaws.com', None) eq_( urlunsplit(url.build("GET", "us-east-1")), 'https://s3.us-east-1.amazonaws.com/', ) eq_( urlunsplit( url.build("GET", "eu-west-1", bucket_name='my.bucket.name'), ), 'https://s3.eu-west-1.amazonaws.com/my.bucket.name', ) eq_( urlunsplit( url.build("GET", 'us-west-2', bucket_name='bucket-name', object_name='objectName'), ), 'https://bucket-name.s3.us-west-2.amazonaws.com/objectName', ) eq_( urlunsplit( url.build("GET", "us-east-1", bucket_name='bucket-name', object_name='objectName', query_params={'versionId': 'uuid'}), ), "https://bucket-name.s3.us-east-1.amazonaws.com" "/objectName?versionId=uuid", )
def to_url(self) -> str: query = urlencode(self._to_url_get_params()) return urlunsplit((PARSEC_SCHEME, self._netloc, quote_plus(self._to_url_get_path()), query, None))
def transform_url(options, url): parts = urlparse(url) return urlunsplit((parts.scheme, options["new_host"], parts.path, parts.query, parts.fragment))
def _get_encoded_url(self) -> str: """Convert any UTF-8 char in :obj:`File.file_path` into a url encoded ASCII string.""" sres = urllib_parse.urlsplit(self.file_path) return urllib_parse.urlunsplit(urllib_parse.SplitResult( sres.scheme, sres.netloc, urllib_parse.quote(sres.path), sres.query, sres.fragment))
def _get_url(self): self._build_url() return urlunsplit( (self.scheme, self.netloc, self.pathstr, self.querystr, ''))
def add_url_parameters(url, parameters): """ Add url parameters to URL. """ scheme, netloc, path, query_string, fragment = urlsplit(url) query = parse_qs(query_string) query.update(parameters) return urlunsplit((scheme, netloc, path, urlencode(query), fragment))
def html_url(doc_name): "Return the canonical URL for a document name." path = "/".join(["html", doc_name]) return urlunsplit(["https", "tools.ietf.org", path, "", ""])
def get_url(self): query = {'cat': self.name} query_string = urlencode(query) return urlunsplit(('', '', '/search', query_string, ''))
def update_openapi(schema: OpenAPIObject, exchange: HttpExchange, mode: UpdateMode) -> OpenAPIObject: """Update OpenAPI schema with a new request-response pair. Does not mutate the input schema. Schema reference: https://swagger.io/specification/#oasObject Returns: OpenAPI -- Updated schema """ request_method = exchange.request.method.value request_path = exchange.request.pathname serverz = [] if schema.servers is None else schema.servers normalized_pathname_or_none = normalize_path_if_matches( exchange.request, serverz) if normalized_pathname_or_none is None: normalized_pathname = request_path else: normalized_pathname = normalized_pathname_or_none schema_paths = schema.paths operation_candidate = build_operation(exchange, mode) path_match_result = find_matching_path(normalized_pathname, schema_paths, request_method, operation_candidate) request_path_parameters = {} make_new_paths = True if path_match_result is not None: # Path item exists for request path ( path_item, request_path_parameters, pathname_with_wildcard, pathname_to_be_replaced_with_wildcard, ) = ( path_match_result.path, path_match_result.param_mapping, path_match_result.pathname_with_wildcard, path_match_result.pathname_to_be_replaced_with_wildcard, ) # Create a wildcard if the mode is not replay/mixed if pathname_to_be_replaced_with_wildcard is not None: if mode == UpdateMode.GEN: # the algorithm has updated the pathname, need to mutate # the schema paths to use the new and discard the old if the old exists # in the schema. it would not exist if we have already put a wildcard pointer_to_value = schema_paths[ pathname_to_be_replaced_with_wildcard] schema_paths = { k: v for k, v in [ (pathname_with_wildcard, pointer_to_value), *schema_paths.items(), ] if k != pathname_to_be_replaced_with_wildcard } parameters_to_assign_to_pathname_with_wildcard = ( [] if schema_paths[pathname_with_wildcard].parameters is None else schema_paths[pathname_with_wildcard].parameters) for path_param in request_path_parameters.keys(): params = [ x for x in parameters_to_assign_to_pathname_with_wildcard if not isinstance(x, Reference) ] if not (path_param in [x.name for x in params if x._in == "path"]): parameters_to_assign_to_pathname_with_wildcard = [ Parameter( required=True, _in="path", name=path_param, ), *parameters_to_assign_to_pathname_with_wildcard, ] schema_paths = { **schema_paths, pathname_with_wildcard: replace( schema_paths[pathname_with_wildcard], parameters= parameters_to_assign_to_pathname_with_wildcard, ), } make_new_paths = False # because we've already done it above else: # we are using recordings, so we shouldn't overwrite anything # we only add if it is not there yet if normalized_pathname not in schema_paths: # TODO: merge with liens below? path_item = PathItem(summary="Path summary", description="Path description") request_path_parameters = {} else: path_item = PathItem(summary="Path summary", description="Path description") request_path_parameters = {} new_paths: Paths = {normalized_pathname: path_item} existing_operation = operation_from_string(path_item, request_method) if existing_operation is not None: # Operation exists operation = update_operation(existing_operation, exchange, mode) else: operation = operation_candidate path_item = new_path_item_at_operation(path_item, request_method, operation) new_paths = ({ path_match_result.pathname_with_wildcard if (mode == UpdateMode.GEN) and ((path_match_result is not None) and (path_match_result.pathname_with_wildcard is not None)) else normalized_pathname: path_item } if make_new_paths else {}) new_server = Server(url=urlunsplit([ str(exchange.request.protocol.value), exchange.request.host, "", "", "" ])) return replace( schema, paths={ **schema_paths, **new_paths }, servers=serverz if (new_server.url in [x.url for x in serverz]) else [*serverz, new_server], )
#!/usr/bin/env python3 # -*- coding:utf-8 -*- from urllib.parse import urlunsplit data = ['http', 'www.baidu.com', 'index.html', 'a=6', 'comment'] print(urlunsplit(data))
def _remove_anchor(url): """Remove the anchor from a URL.""" scheme, netloc, path, qs, anchor = urlsplit(url) return urlunsplit((scheme, netloc, path, qs, ''))
from urllib.parse import urlsplit, urlunsplit from django.core.exceptions import SuspiciousOperation from django.conf import settings BASE_ORIGIN = urlunsplit(urlsplit(settings.BASE_URL)._replace(path='')) ALLOWED_ORIGINS = [BASE_ORIGIN] class MissingOrigin(SuspiciousOperation): pass class DisallowedOrigin(SuspiciousOperation): pass class OriginMiddleware: def __init__(self, get_response): self.get_response = get_response def __call__(self, request): if request.headers.cookie and request.method != 'GET': origin = request.headers.origin if not origin: referer = request.headers.referer if referer: origin = urlunsplit( urlsplit(referer)._replace(path='', query=''))
def assertRedirects(self, response, expected_url, status_code=302, target_status_code=200, host=None, msg_prefix=''): """Asserts that a response redirected to a specific URL, and that the redirect URL can be loaded. Note that assertRedirects won't work for external links since it uses TestClient to do a request. """ if msg_prefix: msg_prefix += ": " if hasattr(response, 'redirect_chain'): # The request was a followed redirect self.assertTrue( len(response.redirect_chain) > 0, msg_prefix + "Response didn't redirect as expected: Response" " code was %d (expected %d)" % (response.status_code, status_code)) self.assertEqual( response.redirect_chain[0][1], status_code, msg_prefix + "Initial response didn't redirect as expected:" " Response code was %d (expected %d)" % (response.redirect_chain[0][1], status_code)) url, status_code = response.redirect_chain[-1] self.assertEqual( response.status_code, target_status_code, msg_prefix + "Response didn't redirect as expected: Final" " Response code was %d (expected %d)" % (response.status_code, target_status_code)) else: # Not a followed redirect self.assertEqual( response.status_code, status_code, msg_prefix + "Response didn't redirect as expected: Response" " code was %d (expected %d)" % (response.status_code, status_code)) url = response['Location'] scheme, netloc, path, query, fragment = urlsplit(url) redirect_response = response.client.get(path, QueryDict(query)) # Get the redirection page, using the same client that was used # to obtain the original response. self.assertEqual( redirect_response.status_code, target_status_code, msg_prefix + "Couldn't retrieve redirection page '%s':" " response code was %d (expected %d)" % (path, redirect_response.status_code, target_status_code)) e_scheme, e_netloc, e_path, e_query, e_fragment = urlsplit( expected_url) if not (e_scheme or e_netloc): expected_url = urlunsplit( ('http', host or 'testserver', e_path, e_query, e_fragment)) self.assertEqual( url, expected_url, msg_prefix + "Response redirected to '%s', expected '%s'" % (url, expected_url))
def make_uri(scheme, netloc, url, query, fragment): return urlunsplit((scheme, netloc, url, query, fragment))
def add_params_to_urls(url, params): parts = urlsplit(url) query = parse_qsl(parts.query) query.extend(params.items()) return urlunsplit([parts.scheme, parts.netloc, parts.path, urlencode(query), parts.fragment])
def _build_url(self, path): return urlunsplit(["https", self.host_domain, path, "", ""])
def __base_uri(uri): (scheme, netloc, path, query, fragment) = urlsplit(uri) path = os.path.dirname(url2pathname(path)) + '/' return urlunsplit((scheme, netloc, path, query, fragment))
visitedURLs.append(URL) print(f"\nScanning {URL}\n") try: page = requests.get(URL, headers=headers) soup = BeautifulSoup(page.content, "html.parser") for a in soup.findAll("a"): link = a.get("href") link = urljoin(URL, link) scheme, netloc, path, query, fragment = parse.urlsplit(link) path = unquote(path) path = quote(path) link = parse.urlunsplit((scheme, netloc, path, query, fragment)) if link: if link in visitedURLs: pass if link[-4:] == ".mp3" and link not in downloadedFiles: try: download(link) except: pass downloadedFiles.append(link) elif ( link[:5] in ("index", "javas", "regis", "login") or "blog" in link or "facebook" in link
def __str__(self): val = self._val if not val.path and self.is_absolute() and (val.query or val.fragment): val = val._replace(path="/") return urlunsplit(val)
def update_url_with_ip(url, url_ip): # also make sure use http and not https url_parts = list(urlsplit(url.strip())) url_parts[0] = 'http' url_parts[1] = url_ip return urlunsplit(url_parts)
def authenticate(): now = datetime.datetime.utcnow() request_args = request.args.copy() suggested_userid = request_args.pop('userid', None) try: wls_req = AuthRequest.from_params_dict(request_args) except InvalidAuthRequest as e: return wls_fail(message=e) except ProtocolVersionUnsupported as e: return wls_fail( "The protocol version in use is not supported by this service") if not wls.have_mutual_auth_type(wls_req): if wls_req.fail: return wls_fail( "None of the requested authentication types are supported by this service" ) else: wls_resp = wls.generate_failure(NO_MUTUAL_AUTH_TYPES, wls_req) return redirect(wls_resp.redirect_url) parts = urlsplit(wls_req.url) scheme = parts.scheme netloc = parts.netloc # includes port number port = parts.port # possibly None if it wasn't specified explicitly if port is None: match = re.match(DOMAIN_PORTLESS_RE, netloc) else: provisional_match = re.match(DOMAIN_WITH_PORT_RE, netloc) match = provisional_match if str(port) == provisional_match.group( 2) else None if not match: return render_template('error.html', message="Bad return host", fail=False), 400 else: domain = match.group(1) if not domain: return render_template('error.html', message="No return domain specified", fail=False), 400 ctx = { 'wls_req': wls_req, 'domain': domain, 'raven_handoff': urlunsplit([ 'https', 'raven.cam.ac.uk', '/auth/authenticate.html', request.query_string.decode(), '' ]), 'suggested_userid': suggested_userid, } if wls_req.desc: ctx['desc_safe'] = wls_req.desc.replace('<', '<').replace('>', '>') if wls_req.msg: ctx['msg_safe'] = wls_req.msg.replace('<', '<').replace('>', '>') if scheme != 'https' and not app.config['ALLOW_INSECURE_WAA'] \ and domain not in ('localhost', '127.0.0.1', '[::1]'): ctx['scheme'] = scheme return render_template('insecure_waa.html', **ctx), 400 if netloc in app.config['BANNED_WAA_DOMAINS'] \ or domain in app.config['BANNED_WAA_DOMAINS']: message = "Host %s is not authorised to use this service" % netloc if wls_req.fail: return wls_fail(message) else: wls_resp = wls.generate_failure( WAA_NOT_AUTHORISED, wls_req, msg=message, sign=False, ) return redirect(wls_resp.redirect_url) if request.method == 'POST': username = request.form.get('userid').strip().lower() or None password = request.form.get('pwd') or None action = request.form.get('action') or None if action == 'cancel': if wls_req.fail: return wls_fail("The user cancelled authentication.") else: wls_resp = wls.generate_failure(USER_CANCEL, wls_req) return redirect(wls_resp.redirect_url) if not (username and password): err_msg = 'Missing username and/or password' ctx.update({ 'username': username, 'err_msg': err_msg, }) return render_template('authenticate.html', **ctx) if check_credentials(username, password): session['userid'] = username expiry = session['expiry'] = now + datetime.timedelta(hours=6) principal = construct_principal(username, expiry) wls_resp = wls.authenticate_active(wls_req, principal, 'pwd') return redirect(wls_resp.redirect_url) else: err_msg = "Unrecognised username or password" ctx.update({ 'username': username, 'err_msg': err_msg, }) return render_template('authenticate.html', **ctx) else: # => request is GET saved_userid = session.get('userid') expiry = None expired = None if saved_userid: expiry = session.get('expiry') expired = expiry and (expiry < now) if saved_userid and not expired: # Previous session exists and hasn't expired if wls_req.iact is True: # Require re-authentication, force same userid ctx.update({ 'session_existed': False, 'force_userid': saved_userid, }) return render_template('authenticate.html', **ctx) else: # Passive authentication is possible and permissible principal = construct_principal(saved_userid, expiry) wls_resp = wls.authenticate_passive(wls_req, principal) return redirect(wls_resp.redirect_url) else: # Previous session has expired, or no session existed session_existed = (expiry is not None) if wls_req.iact is False: # We cannot authenticate passively but it is demanded, so return a failure if wls_req.fail: return wls_fail( "User interaction would be required for authentication, " "but the web application demanded that authentication is completed " "without it") else: wls_resp = wls.generate_failure(INTERACTION_REQUIRED, wls_req) return redirect(wls_resp.redirect_url) ctx.update({ 'session_existed': session_existed, }) return render_template('authenticate.html', **ctx)
def expand_url( url, # type: str base_url, # type: str loadingOptions, # type: LoadingOptions scoped_id=False, # type: bool vocab_term=False, # type: bool scoped_ref=None, # type: Optional[int] ): # type: (...) -> str if url in ("@id", "@type"): return url if vocab_term and url in loadingOptions.vocab: return url if bool(loadingOptions.vocab) and ":" in url: prefix = url.split(":")[0] if prefix in loadingOptions.vocab: url = loadingOptions.vocab[prefix] + url[len(prefix) + 1 :] split = urlsplit(url) if ( (bool(split.scheme) and split.scheme in ["http", "https", "file"]) or url.startswith("$(") or url.startswith("${") ): pass elif scoped_id and not bool(split.fragment): splitbase = urlsplit(base_url) frg = "" if bool(splitbase.fragment): frg = splitbase.fragment + "/" + split.path else: frg = split.path pt = splitbase.path if splitbase.path != "" else "/" url = urlunsplit((splitbase.scheme, splitbase.netloc, pt, splitbase.query, frg)) elif scoped_ref is not None and not bool(split.fragment): splitbase = urlsplit(base_url) sp = splitbase.fragment.split("/") n = scoped_ref while n > 0 and len(sp) > 0: sp.pop() n -= 1 sp.append(url) url = urlunsplit( ( splitbase.scheme, splitbase.netloc, splitbase.path, splitbase.query, "/".join(sp), ) ) else: url = loadingOptions.fetcher.urljoin(base_url, url) if vocab_term: split = urlsplit(url) if bool(split.scheme): if url in loadingOptions.rvocab: return loadingOptions.rvocab[url] else: raise ValidationException(f"Term '{url}' not in vocabulary") return url
result[0], result.netloc, result[1], sep='\n') # urlunparse() data = ['http', 'www.baidu.com', 'index.html', 'user', 'id=6', 'commant'] print(parse.urlunparse(data)) # urlsplit() result1 = parse.urlsplit('http://www.baidu.com/index.html;user?id=5#comment') print(result1) # urlunsplit() data1 = ['http', 'www.baidu.com', 'index.html', 'id=6', 'commant'] print(parse.urlunsplit(data1)) # urlencode() params = {'name': 'vchase', 'age': '21'} base_url = 'http://www.baidu.com?' url = base_url + parse.urlencode(params) print(url) # parse_qs() query = 'name=vchase&age=21' print(parse.parse_qs(query)) # quote() keyword = '壁纸' url1 = 'https://www.baidu.com/s?wd=' + parse.quote(keyword) print(url1)
def urlescape(self, url): scheme, netloc, path, qs, anchor = parse.urlsplit(url) path = parse.quote(path, '/%') qs = parse.quote_plus(qs, ':&=') return parse.urlunsplit((scheme, netloc, path, qs, anchor))
def urlPath(url): parts = urlsplit(url) return urlunsplit(('', '', parts.path, parts.query, parts.fragment))