def prepare_request(self, method, uri, params=None, headers=None, data=None, json=None): params = {} if params is None else params if not isinstance(params, dict): raise TypeError('params should be dict') method = method.upper() params = utf8_encoded_dict(params) url = '/'.join([self.api_server, self.endpoint, self.version ]) + uri.strip() logger.debug(url) url_parsed = urlparse(url) enc_params = urlencode(params) logger.debug(enc_params) if url_parsed.query == '' or url_parsed.query is None: query = enc_params elif enc_params == '' or enc_params is None: query = url_parsed.query else: query = '%s&%s' % (url_parsed.query, enc_params) real_uri = urlunparse(('', '', url_parsed.path, url_parsed.params, query, url_parsed.fragment)) real_url = urlunparse( (url_parsed.scheme, url_parsed.netloc, url_parsed.path, url_parsed.params, query, url_parsed.fragment)) self.request_data.host = url_parsed.netloc self.request_data.uri = real_uri self.request_data.method = method self.request_data.headers = { 'Accept': 'application/json; charset=utf-8' } if headers is not None: # headers 是字典 self.request_data.headers.update(headers) if method == 'GET': self.request_data.body = '' else: if json is not None: self.request_data.headers[ 'Content-Type'] = 'application/json; charset=utf-8' self.request_data.body = json_util.dumps(json, ensure_ascii=False) else: self.request_data.body = data return real_url
def aws_federate(self): logger.debug("Attempting to open AWS console.") creds = { "sessionId": self.credentials["AccessKeyId"], "sessionKey": self.credentials["SecretAccessKey"], "sessionToken": self.credentials["SessionToken"], } query = urlencode({ "Action": "getSigninToken", "Session": json.dumps(creds), }) logger.debug("Web Console params: {}".format(query)) url_tuple = urlparse("https://signin.aws.amazon.com/federation") url = urlunparse(url_tuple._replace(query=query)) try: token = requests.get(url).json() except requests.exceptions.ConnectionError as e: self.exit( "Unable to contact AWS to open web console : {}".format(e)) return None account_id = self.role_arn.split(":")[4] account_alias = get_alias(self.role_map, account_id) role = self.role_arn.split(":")[5].split("/")[-1] issuer_url_query = urlencode({"account": account_alias, "role": role}) issuer_url = urlunparse( ("https", self.issuer_domain, "/", "", issuer_url_query, "")) query = urlencode({ "Action": "login", "Destination": "https://console.aws.amazon.com/", "SigninToken": token["SigninToken"], "Issuer": issuer_url, }) url = urlunparse(url_tuple._replace(query=query)) logger.debug("Web browser console URL: {}".format(url)) if self.print_url: print(url) self.state = "finished" elif self.opened_tab: self.state = "aws_federate" self.web_state["awsFederationUrl"] = url else: self.opened_tab = True webbrowser.open_new_tab(url) self.state = "finished" return url
def build_url(self, params): """Build HTTP request to send to AWIS. **Args:** *params*: **Returns:** the complete URL that can be fed into the AWIS API """ params.update(self.default_params()) params.update({'Signature': self.calculate_signature(params)}) url_components = [ 'http', AWIS_HOST, PATH, None, self.canonicalized_query_string(params), None, ] url = urlunparse(url_components) return url
def prepare_url(self): url_parsed = urlparse(self.raw_base_url) items = url_parsed.path.split('/') if len(items) > 0: item = items[-1] items = items[:-1] new_path = '/'.join(items) else: item = '' new_path = url_parsed.path url = urlunparse( (url_parsed.scheme, url_parsed.netloc, new_path, '', '', '')) if item.endswith('.php'): self.site_lang = 'php' elif item.endswith('.asp'): self.site_lang = 'asp' elif item.endswith('.aspx'): self.site_lang = 'aspx' if self.site_lang != '': logger.info('site_lang: %s' % self.site_lang) self.base_url = url self.first_item = item logger.info('base_url: %s' % url) logger.info('first_item: %s' % item)
def url_rewrite(self, task, entry): url = entry['url'] page = None for (scheme, netloc) in EZTV_MIRRORS: try: _, _, path, params, query, fragment = urlparse(url) url = urlunparse( (scheme, netloc, path, params, query, fragment)) page = task.requests.get(url).content except RequestException as e: log.debug('Eztv mirror `%s` seems to be down', url) continue break if not page: raise UrlRewritingError('No mirrors found for url %s' % entry['url']) log.debug('Eztv mirror `%s` chosen', url) try: soup = get_soup(page) mirrors = soup.find_all( 'a', attrs={'class': re.compile(r'download_\d')}) except Exception as e: raise UrlRewritingError(e) log.debug('%d torrent mirrors found', len(mirrors)) if not mirrors: raise UrlRewritingError( 'Unable to locate download link from url %s' % url) entry['urls'] = [m.get('href') for m in mirrors] entry['url'] = mirrors[0].get('href')
def url_rewrite(self, task, entry): url = entry["url"] page = None for (scheme, netloc) in EZTV_MIRRORS: try: _, _, path, params, query, fragment = urlparse(url) url = urlunparse((scheme, netloc, path, params, query, fragment)) page = task.requests.get(url).content except RequestException as e: log.debug("Eztv mirror `%s` seems to be down", url) continue break if not page: raise UrlRewritingError("No mirrors found for url %s" % entry["url"]) log.debug("Eztv mirror `%s` chosen", url) try: soup = get_soup(page) mirrors = soup.find_all("a", attrs={"class": re.compile(r"download_\d")}) except Exception as e: raise UrlRewritingError(e) log.debug("%d torrent mirrors found", len(mirrors)) if not mirrors: raise UrlRewritingError("Unable to locate download link from url %s" % url) entry["urls"] = [m.get("href") for m in mirrors] entry["url"] = mirrors[0].get("href")
def url_rewrite(self, task, entry): url = entry['url'] page = None for (scheme, netloc) in EZTV_MIRRORS: try: _, _, path, params, query, fragment = urlparse(url) url = urlunparse((scheme, netloc, path, params, query, fragment)) page = task.requests.get(url).content except RequestException as e: log.debug('Eztv mirror `%s` seems to be down', url) continue break if not page: raise UrlRewritingError('No mirrors found for url %s' % entry['url']) log.debug('Eztv mirror `%s` chosen', url) try: soup = get_soup(page) mirrors = soup.find_all('a', attrs={'class': re.compile(r'download_\d')}) except Exception as e: raise UrlRewritingError(e) log.debug('%d torrent mirrors found', len(mirrors)) if not mirrors: raise UrlRewritingError('Unable to locate download link from url %s' % url) entry['urls'] = [m.get('href') for m in mirrors] entry['url'] = mirrors[0].get('href')
def prepare_request(self, method, uri, params=None, headers=None, data=None, json=None): params = {} if params is None else params if not isinstance(params, dict): raise TypeError('params should be dict') method = method.upper() params = utf8_encoded_dict(params) url = '/'.join([self.api_server, self.endpoint, self.version]) + uri.strip() logger.debug(url) url_parsed = urlparse(url) enc_params = urlencode(params) logger.debug(enc_params) if url_parsed.query == '' or url_parsed.query is None: query = enc_params elif enc_params == '' or enc_params is None: query = url_parsed.query else: query = '%s&%s' % (url_parsed.query, enc_params) real_uri = urlunparse(('', '', url_parsed.path, url_parsed.params, query, url_parsed.fragment)) real_url = urlunparse((url_parsed.scheme, url_parsed.netloc, url_parsed.path, url_parsed.params, query, url_parsed.fragment)) self.request_data.host = url_parsed.netloc self.request_data.uri = real_uri self.request_data.method = method self.request_data.headers = { 'Accept': 'application/json; charset=utf-8' } if headers is not None: # headers 是字典 self.request_data.headers.update(headers) if method == 'GET': self.request_data.body = '' else: if json is not None: self.request_data.headers['Content-Type'] = 'application/json; charset=utf-8' self.request_data.body = json_util.dumps(json, ensure_ascii=False) else: self.request_data.body = data return real_url
def redirect_to_terms_accept(current_path='/', slug='default'): """Redirect the user to the terms and conditions accept page.""" redirect_url_parts = list(urlparse(ACCEPT_TERMS_PATH)) if slug != 'default': redirect_url_parts[2] += slug querystring = QueryDict(redirect_url_parts[4], mutable=True) querystring[TERMS_RETURNTO_PARAM] = current_path redirect_url_parts[4] = querystring.urlencode(safe='/') return HttpResponseRedirect(urlunparse(redirect_url_parts))
def __call__(self, r): # modify and return the request url_parts = urlparse(r.url) qs_args = parse_qs(url_parts[4]) qs_args.update({"token": self._get_token()}) new_qs = urlencode(qs_args, True) r.url = urlunparse( list(url_parts[0:4]) + [new_qs] + list(url_parts[5:])) return r
def encode_url_utf8(url): """Encode the path component of url to percent-encoded UTF8.""" (scheme, netloc, path, params, query, fragment) = urlparse(url) # Assume that the path is already encoded if there seems to be # percent encoded entities. if re.search(r'%[0-9A-Fa-f]{2}', path) is None: path = quote(path.encode('UTF8'), '/+') return urlunparse((scheme, netloc, path, params, query, fragment))
def _wrapped_view(request, *args, **kwargs): """Method to wrap the view passed in""" if not request.user.is_authenticated() or TermsAndConditions.agreed_to_latest(request.user): return view_func(request, *args, **kwargs) currentPath = request.path login_url_parts = list(urlparse(ACCEPT_TERMS_PATH)) querystring = QueryDict(login_url_parts[4], mutable=True) querystring['returnTo'] = currentPath login_url_parts[4] = querystring.urlencode(safe='/') return HttpResponseRedirect(urlunparse(login_url_parts))
def get_update_url(url, data): """ 获取更新后的url :param url: :param data: :return: """ result = urlparse(url) query_payload = dict(parse_qsl(result.query), **data) query_param = urlencode(query_payload) return urlunparse((result.scheme, result.netloc, result.path, result.params, query_param, result.fragment))
def __str__(self): """ Generate URL from the recorded parameters. Returns ------- str """ query = {} for key in self._parameters: query[key] = self._parameters[key] path = self._path if self._include_library_param: query["ixlib"] = "python-" + __version__ if path.startswith("http"): try: path = quote(path, safe="~()*!.'") except KeyError: path = quote(path.encode('utf-8'), safe="~()*!.'") if not path.startswith("/"): path = "/" + path # Fix web proxy style URLs if not path.startswith("/http") and not self._str_is_ascii(path): try: path = quote(path) except KeyError: path = quote(path.encode('utf-8')) query = "&".join((quote(key, "") + "=" + quote(query[key], "")) for key in sorted(query)) if self._sign_key: delim = "" if query == "" else "?" signing_value = self._sign_key + path + delim + query signature = hashlib.md5(signing_value.encode('utf-8')).hexdigest() if query: query += "&s=" + signature else: query = "s=" + signature return urlunparse([ self._scheme, self._host, path, "", query, "", ])
def iriToUri(iri): """Change an IRI (internationalized R) to an URI. Used at MambuStruct.connect() method for any requests done to Mambu. Perfect example of unicode getting in the way. Illustrative (I hope) example: I have Mambu usernames with special chars in them. When retrieving them and then trying to build a MambuUser object with them, I get a BIG problem because of the unicode chars there. Using this I solved the problem. """ def urlEncodeNonAscii(b): """Encode Non ASCII chars to URL-friendly chars. Sometimes unicode gets in the way. A shame, I know. And perhaps the biggest shame is not me correctly handling it. """ import re return re.sub('[\x80-\xFF]', lambda c: '%%%02x' % ord(c.group(0)), b) parts = urlparse.urlparse(iri) if sys.version_info < (3, 0): # python2 partes = [] for parti, part in enumerate(parts): try: if parti != 1: partes.append(urlEncodeNonAscii(part.encode('utf-8'))) else: partes.append(part.encode('idna')) except UnicodeDecodeError as ue: partes.append(urlEncodeNonAscii(part.decode('latin'))) except Exception as e: raise e return urlparse.urlunparse(partes) else: # python3 uri = [part.decode('utf8') for parti, part in enumerate(parts.encode('utf8'))] return urlparse.urlunparse(uri)
def _serve_file(self, abspath, params): """Show a file. The actual content of the file is rendered by _handle_content. """ relpath = os.path.relpath(abspath, self._root) breadcrumbs = self._create_breadcrumbs(relpath) link_path = urlunparse([None, None, relpath, None, urlencode(params), None]) args = self._default_template_args('file.html') args.update({'root_parent': os.path.dirname(self._root), 'breadcrumbs': breadcrumbs, 'link_path': link_path}) self._send_content(self._renderer.render_name('base.html', args), 'text/html')
def _serve_file(self, abspath, params): """Show a file. The actual content of the file is rendered by _handle_content. """ relpath = os.path.relpath(abspath, self._root) breadcrumbs = self._create_breadcrumbs(relpath) link_path = urlunparse(['', '', relpath, '', urlencode(params), '']) args = self._default_template_args('file.html') args.update({'root_parent': os.path.dirname(self._root), 'breadcrumbs': breadcrumbs, 'link_path': link_path}) content = self._renderer.render_name('base.html', args).encode("utf-8") self._send_content(content, 'text/html')
def _wrapped_view(request, *args, **kwargs): """Method to wrap the view passed in""" # If user has not logged in, or if they have logged in and already agreed to the terms, let the view through if not request.user.is_authenticated( ) or not TermsAndConditions.get_active_terms_not_agreed_to( request.user): return view_func(request, *args, **kwargs) # Otherwise, redirect to terms accept current_path = request.path login_url_parts = list(urlparse(ACCEPT_TERMS_PATH)) querystring = QueryDict(login_url_parts[4], mutable=True) querystring['returnTo'] = current_path login_url_parts[4] = querystring.urlencode(safe='/') return HttpResponseRedirect(urlunparse(login_url_parts))
def svnUriCanonicalize(uri): collapse = re.compile(r'([^/]+/\.\./?|/\./|//|/\.$|/\.\.$|^/\.\.)') server_authority = re.compile(r'^(?:([^@]+)@)?([^:]+)(?::(.+))?$') default_port = {'http': '80', 'https': '443', 'svn': '3690'} relative_schemes = ['http', 'https', 'svn'] def quote(uri): return urlquote(uri, "!$&'()*+,-./:=@_~", encoding="latin-1") if not uri or uri == '/': return uri (scheme, authority, path, parameters, query, fragment) = urlparse(uri) scheme = scheme.lower() if authority: mo = server_authority.match(authority) if not mo: return uri # give up userinfo, host, port = mo.groups() if host[-1] == '.': host = host[:-1] authority = host.lower() if userinfo: authority = "%s@%s" % (userinfo, authority) if port and port != default_port.get(scheme, None): authority = "%s:%s" % (authority, port) if scheme in relative_schemes: last_path = path while True: path = collapse.sub('/', path, 1) if last_path == path: break last_path = path path = quote(urlunquote(path)) canonical_uri = urlunparse( (scheme, authority, path, parameters, query, fragment)) if canonical_uri == '/': return canonical_uri elif canonical_uri[-1] == '/' and canonical_uri[-2] != '/': return canonical_uri[:-1] return canonical_uri
def _wrapped_view(request, *args, **kwargs): """Method to wrap the view passed in""" # If user has not logged in, or if they have logged in and already agreed to the terms, let the view through if DJANGO_VERSION <= (2, 0, 0): user_authenticated = request.user.is_authenticated() else: user_authenticated = request.user.is_authenticated if not user_authenticated or not TermsAndConditions.get_active_terms_not_agreed_to(request.user): return view_func(request, *args, **kwargs) # Otherwise, redirect to terms accept current_path = request.path login_url_parts = list(urlparse(ACCEPT_TERMS_PATH)) querystring = QueryDict(login_url_parts[4], mutable=True) querystring['returnTo'] = current_path login_url_parts[4] = querystring.urlencode(safe='/') return HttpResponseRedirect(urlunparse(login_url_parts))
def tidy_url(url): ''' Given a URL it does various checks before returning a tidied version suitable for calling. It may raise LinkInvalidError if the URL has a problem. ''' # Find out if it has unicode characters, and if it does, quote them # so we are left with an ascii string try: url = url.decode('ascii') except Exception: parts = list(urlparse(url)) parts[2] = quote(parts[2].encode('utf-8')) url = urlunparse(parts) url = str(url) # strip whitespace from url # (browsers appear to do this) url = url.strip() # Use urllib3 to parse the url ahead of time, since that is what # requests uses, but when it does it during a GET, errors are not # caught well try: parsed_url = urllib3.util.parse_url(url) except urllib3.exceptions.LocationParseError as e: raise LinkInvalidError(_('URL parsing failure: %s') % e) # Check we aren't using any schemes we shouldn't be. # Scheme is case-insensitive. if not parsed_url.scheme or not parsed_url.scheme.lower( ) in ALLOWED_SCHEMES: raise LinkInvalidError( _('Invalid url scheme. Please use one of: %s') % ' '.join(ALLOWED_SCHEMES)) if not parsed_url.host: raise LinkInvalidError( _('URL parsing failure - did not find a host name')) return url
def process_html(self, current_url, response): if response.body in (None, b'', ''): return content_type = response.headers.get('Content-Type', '') if 'text/html' in content_type.lower(): soup = BeautifulSoup(response.body, 'html5lib') script_list = [t.get('src') for t in soup.find_all('script')] style_list = [t.get('href') for t in soup.find_all('link')] a_list = [t.get('href') for t in soup.find_all('a')] script_style_list = script_list + style_list script_style_list = set( [t for t in script_style_list if t is not None]) a_list = set([t for t in a_list if t is not None]) new_a_list = [] for t in a_list: if t.startswith('#'): continue # apache 目录列出,排序的链接 if t in ('?C=N;O=A', '?C=N;O=D', '?C=M;O=A', '?C=M;O=D', '?C=D;O=A', '?C=D;O=D', '?C=S;O=A', '?C=S;O=D'): continue url = urljoin(current_url, t) url_parsed = urlparse(url) if url_parsed.scheme in ('http', 'https'): url = urlunparse( (url_parsed.scheme, url_parsed.netloc, url_parsed.path, '', url_parsed.query, '')) new_a_list.append(url) a_list = set(new_a_list) for t in script_style_list: url = urljoin(current_url, t) self.task_queue.append(url) # 测试 Webpack 的 sourcemap 文件 if (t.endswith('.js') or t.endswith('.css')) and not t.endswith('.map'): self.task_queue.appendleft(url + '.map') for url in a_list: self.task_queue.appendleft(url)
def add_key_to_url(url, scheme, key): """Redirects the user to the requests URL with the given key appended to the query parameters.""" query = request.args.to_dict() query['view_only'] = key replacements = {'query': urlencode(query)} if scheme: replacements['scheme'] = scheme parsed_url = urlparse(url) if parsed_url.fragment: # Fragments should exists server side so this mean some one set up a # in the url # WSGI sucks and auto unescapes it so we just shove it back into the path with the escaped hash replacements['path'] = '{}%23{}'.format(parsed_url.path, parsed_url.fragment) replacements['fragment'] = '' parsed_redirect_url = parsed_url._replace(**replacements) return urlunparse(parsed_redirect_url)
def landing_page(request, orequest, login_url=None, redirect_field_name=REDIRECT_FIELD_NAME): """ The page shown when the user attempts to sign in somewhere using OpenID but is not authenticated with the site. For idproxy.net, a message telling them to log in manually is displayed. """ try: request.session['OPENID_REQUEST'] = orequest.message.toPostArgs() except AttributeError: pass if not login_url: login_url = settings.LOGIN_URL path = request.get_full_path() login_url_parts = list(urlparse(login_url)) if redirect_field_name: querystring = SafeQueryDict(login_url_parts[4], mutable=True) querystring[redirect_field_name] = path login_url_parts[4] = querystring.urlencode(safe='/') return HttpResponseRedirect(urlunparse(login_url_parts))
def _url(self): query = urlencode({'xml': self.xml}) url = urlunparse(('https', self.host, self.path, '', query, '')) return url
def _canonicalize_uri(uri): scheme, netloc, path, params, query, fragment = parse.urlparse(uri) if query: query = parse.urlencode(sorted(parse.parse_qsl(query))) return parse.urlunparse((scheme, netloc, path, params, query, fragment))
def prepare_request(self, method, uri, params=None, headers=None, data=None, json=None, access_token=None): params = {} if params is None else params if not isinstance(params, dict): raise TypeError('params should be dict') if uri == '': uri = '/' method = method.upper() params = utf8_encoded_dict(params) url = '/'.join([self.api_server, self.endpoint, self.version ]) + uri.strip() logger.debug(url) url_parsed = urlparse(url) enc_params = urlencode(params) logger.debug(enc_params) if url_parsed.query == '' or url_parsed.query is None: query = enc_params elif enc_params == '' or enc_params is None: query = url_parsed.query else: query = '%s&%s' % (url_parsed.query, enc_params) real_uri = urlunparse(('', '', url_parsed.path, url_parsed.params, query, url_parsed.fragment)) real_url = urlunparse( (url_parsed.scheme, url_parsed.netloc, url_parsed.path, url_parsed.params, query, url_parsed.fragment)) self.request_data.uri = real_uri self.request_data.method = method self.request_data.headers = { # 'Accept': 'application/json; charset=utf-8', 'Host': url_parsed.netloc } if headers is not None and isinstance(headers, dict): # headers 是字典 self.request_data.headers.update(headers) if access_token is not None: self.request_data.headers[HEADER_X_ACCESS_TOKEN] = access_token self.request_data.body = '' if method in ['POST', 'PUT']: if json is not None: self.request_data.headers[ 'Content-Type'] = 'application/json; charset=utf-8' self.request_data.body = json_util.dumps(json) else: self.request_data.body = '' if data is None else data return real_url