def check_uri_security(uri): """Warns if the URL is insecure.""" if urlparse(uri).scheme != 'https': warning_message = ( 'WARNING: this client is sending a request to an insecure' ' API endpoint. Any API request you make may expose your API key and' ' secret to third parties. Consider using the default endpoint:\n\n' ' %s\n') % uri warnings.warn(warning_message, UserWarning) return uri
def join_uri(base, uri, construct=True): p_uri = urlparse(uri) # Return if there is nothing to join if not p_uri.path: return base scheme, netloc, path, params, query, fragment = urlparse(base) # Switch to 'uri' parts _, _, _, params, query, fragment = p_uri path = join_path(path, p_uri.path) result = ParseResult(scheme, netloc, path, params, query, fragment) if not construct: return result # Construct from parts return urlunparse(result)
def get_full_url(self): # Only return the response's URL if the user hadn't set the Host # header if not self._r.headers.get('Host'): return self._r.url # If they did set it, retrieve it and reconstruct the expected domain host = self._r.headers['Host'] parsed = urlparse(self._r.url) # Reconstruct the URL as we expect it return urlunparse([ parsed.scheme, host, parsed.path, parsed.params, parsed.query, parsed.fragment ])
def _next_key_and_value(self): """ Parse a single attribute type and value pair from one or more lines of LDIF data Returns attr_type (text) and attr_value (bytes) """ # Reading new attribute line unfolded_line = self._unfold_lines() # Ignore comments which can also be folded while unfolded_line and unfolded_line[0]=='#': unfolded_line = self._unfold_lines() if not unfolded_line: return None,None if unfolded_line=='-': return '-',None try: colon_pos = unfolded_line.index(':') except ValueError as e: raise ValueError('no value-spec in %s' % (repr(unfolded_line))) attr_type = unfolded_line[0:colon_pos] # if needed attribute value is BASE64 decoded value_spec = unfolded_line[colon_pos:colon_pos+2] if value_spec==': ': attr_value = unfolded_line[colon_pos+2:].lstrip() # All values should be valid ascii; we support UTF-8 as a # non-official, backwards compatibility layer. attr_value = attr_value.encode('utf-8') elif value_spec=='::': # attribute value needs base64-decoding # base64 makes sens only for ascii attr_value = unfolded_line[colon_pos+2:] attr_value = attr_value.encode('ascii') attr_value = self._b64decode(attr_value) elif value_spec==':<': # fetch attribute value from URL url = unfolded_line[colon_pos+2:].strip() attr_value = None if self._process_url_schemes: u = urlparse(url) if u[0] in self._process_url_schemes: attr_value = urlopen(url).read() else: # All values should be valid ascii; we support UTF-8 as a # non-official, backwards compatibility layer. attr_value = unfolded_line[colon_pos+1:].encode('utf-8') return attr_type,attr_value
def gen_pager(current, count, pagesize, baseurl, seperator="page"): """ current: current page index, shoule always bigger than 0 return { "current": xxx, "previous": xxx, "next": xxx "total_page": xxx } """ if current <= 0: raise Exception("current page should always bigger than 0!") total_page = count // pagesize + 1 if count % pagesize == 0: total_page -= 1 if total_page == 0: total_page = 1 pager = {} pager["current"] = current pager["previous"] = current - 1 if current - 1 > 0 else None pager["next"] = current + 1 if current + 1 <= total_page else None pager["total_page"] = total_page pager["seperator"] = seperator # this is to make sure baseurl + "page=<int: page>" always make a valid url frag = urlparse(baseurl) args = parse_qs(frag.query) # rebuild the query string, but ignore "page" query = "&".join( ["=".join((k, args[k][0])) for k in args if k != seperator]) baseurl = frag.path if query: baseurl += "?" + query + "&" else: baseurl += "?" pager["baseurl"] = baseurl return pager
def gen_pager(current, count, pagesize, baseurl, seperator="page"): """ current: current page index, shoule always bigger than 0 return { "current": xxx, "previous": xxx, "next": xxx "total_page": xxx } """ if current <= 0: raise Exception("current page should always bigger than 0!") total_page = count // pagesize + 1 if count % pagesize == 0: total_page -= 1 if total_page == 0: total_page = 1 pager = {} pager["current"] = current pager["previous"] = current - 1 if current - 1 > 0 else None pager["next"] = current + 1 if current + 1 <= total_page else None pager["total_page"] = total_page pager["seperator"] = seperator # this is to make sure baseurl + "page=<int: page>" always make a valid url frag = urlparse(baseurl) args = parse_qs(frag.query) # rebuild the query string, but ignore "page" query = "&".join(["=".join((k, args[k][0])) for k in args if k != seperator]) baseurl = frag.path if query: baseurl += "?" + query + "&" else: baseurl += "?" pager["baseurl"] = baseurl return pager
def parse_url(url): try: url = unicode(url) except UnicodeDecodeError: pass parsed = urlparse(url) if not (parsed.scheme and parsed.netloc): raise ValueError("invalid URL, no schema supplied: %r" % url) try: dec_netloc = parsed.netloc.encode('idna').decode('utf-8') parsed = parsed._replace(netloc=dec_netloc) except UnicodeError: raise ValueError('invalid characters in url: %r' % parsed.netloc) if not parsed.path: parsed = parsed._replace(path=u'/') for k, v in parsed._asdict().items(): parsed = parsed._replace(**{k: get_encoded(v)}) return parsed
def build_digest_header(self, method, url): realm = self.chal['realm'] nonce = self.chal['nonce'] qop = self.chal.get('qop') algorithm = self.chal.get('algorithm') opaque = self.chal.get('opaque') if algorithm is None: _algorithm = 'MD5' else: _algorithm = algorithm.upper() # lambdas assume digest modules are imported at the top level if _algorithm == 'MD5' or _algorithm == 'MD5-SESS': def md5_utf8(x): if isinstance(x, str): x = x.encode('utf-8') return hashlib.md5(x).hexdigest() hash_utf8 = md5_utf8 elif _algorithm == 'SHA': def sha_utf8(x): if isinstance(x, str): x = x.encode('utf-8') return hashlib.sha1(x).hexdigest() hash_utf8 = sha_utf8 KD = lambda s, d: hash_utf8("%s:%s" % (s, d)) if hash_utf8 is None: return None # XXX not implemented yet entdig = None p_parsed = urlparse(url) path = p_parsed.path if p_parsed.query: path += '?' + p_parsed.query A1 = '%s:%s:%s' % (self.username, realm, self.password) A2 = '%s:%s' % (method, path) HA1 = hash_utf8(A1) HA2 = hash_utf8(A2) if nonce == self.last_nonce: self.nonce_count += 1 else: self.nonce_count = 1 ncvalue = '%08x' % self.nonce_count s = str(self.nonce_count).encode('utf-8') s += nonce.encode('utf-8') s += time.ctime().encode('utf-8') s += os.urandom(8) cnonce = (hashlib.sha1(s).hexdigest()[:16]) if _algorithm == 'MD5-SESS': HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce)) if qop is None: respdig = KD(HA1, "%s:%s" % (nonce, HA2)) elif qop == 'auth' or 'auth' in qop.split(','): noncebit = "%s:%s:%s:%s:%s" % ( nonce, ncvalue, cnonce, 'auth', HA2 ) respdig = KD(HA1, noncebit) else: # XXX handle auth-int. return None self.last_nonce = nonce # XXX should the partial digests be encoded too? base = 'username="******", realm="%s", nonce="%s", uri="%s", ' \ 'response="%s"' % (self.username, realm, nonce, path, respdig) if opaque: base += ', opaque="%s"' % opaque if algorithm: base += ', algorithm="%s"' % algorithm if entdig: base += ', digest="%s"' % entdig if qop: base += ', qop="auth", nc=%s, cnonce="%s"' % (ncvalue, cnonce) return 'Digest %s' % (base)
def get_host(self): return urlparse(self._r.url).netloc
def __init__(self, request): self._r = request self._new_headers = {} self.type = urlparse(self._r.url).scheme
def cfcookie(netloc, ua, timeout): try: headers = {'User-Agent': ua} req = urllib2.Request(netloc, headers=headers) try: urllib2.urlopen(req, timeout=int(timeout)) except urllib2.HTTPError as response: result = response.read(5242880) jschl = re.findall('name="jschl_vc" value="(.+?)"/>', result)[0] init = re.findall('setTimeout\(function\(\){\s*.*?.*:(.*?)};', result)[-1] builder = re.findall(r"challenge-form\'\);\s*(.*)a.v", result)[0] decryptVal = parseJSString(init) lines = builder.split(';') for line in lines: if len(line) > 0 and '=' in line: sections = line.split('=') line_val = parseJSString(sections[1]) decryptVal = int( eval( str(decryptVal) + str(sections[0][-1]) + str(line_val))) answer = decryptVal + len(urlparse(netloc).netloc) query = '%s/cdn-cgi/l/chk_jschl?jschl_vc=%s&jschl_answer=%s' % ( netloc, jschl, answer) if 'type="hidden" name="pass"' in result: passval = re.findall('name="pass" value="(.*?)"', result)[0] query = '%s/cdn-cgi/l/chk_jschl?pass=%s&jschl_vc=%s&jschl_answer=%s' % ( netloc, quote_plus(passval), jschl, answer) time.sleep(5) cookies = cookielib.LWPCookieJar() handlers = [ urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(cookies) ] opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) try: req = urllib2.Request(query, headers=headers) urllib2.urlopen(req, timeout=int(timeout)) except BaseException: pass cookie = '; '.join(['%s=%s' % (i.name, i.value) for i in cookies]) return cookie except BaseException: pass
def request(url, close=True, redirect=True, error=False, proxy=None, post=None, headers=None, mobile=False, limit=None, referer=None, cookie=None, output='', timeout='30', username=None, password=None): if isinstance(post, dict): if is_py2: post = urlencode(post) elif is_py3: post = bytes(urlencode(post), encoding='utf-8') elif isinstance(post, basestring) and is_py3: post = bytes(post, encoding='utf-8') try: handlers = [] if username is not None and password is not None and not proxy: passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm() passmgr.add_password(None, uri=url, user=username, passwd=password) handlers += [urllib2.HTTPBasicAuthHandler(passmgr)] opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) if proxy is not None: if username is not None and password is not None: passmgr = urllib2.ProxyBasicAuthHandler() passmgr.add_password(None, uri=url, user=username, passwd=password) handlers += [ urllib2.ProxyHandler({'http': '{0}'.format(proxy)}), urllib2.HTTPHandler, urllib2.ProxyBasicAuthHandler(passmgr) ] else: handlers += [ urllib2.ProxyHandler({'http': '{0}'.format(proxy)}), urllib2.HTTPHandler ] opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) if output == 'cookie' or output == 'extended' or close is not True: cookies = cookielib.LWPCookieJar() handlers += [ urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(cookies) ] opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) try: if (2, 7, 9) < sys.version_info: raise BaseException import ssl try: import _ssl CERT_NONE = _ssl.CERT_NONE except ImportError: CERT_NONE = ssl.CERT_NONE ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = CERT_NONE handlers += [urllib2.HTTPSHandler(context=ssl_context)] opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) except BaseException: pass try: headers.update(headers) except BaseException: headers = {} if 'User-Agent' in headers: pass elif not mobile is True: #headers['User-Agent'] = agent() headers['User-Agent'] = randomagent() else: headers['User-Agent'] = 'Apple-iPhone/701.341' if 'Referer' in headers: pass elif referer is None: headers['Referer'] = '%s://%s/' % (urlparse(url).scheme, urlparse(url).netloc) else: headers['Referer'] = referer if not 'Accept-Language' in headers: headers['Accept-Language'] = 'en-US' if 'Cookie' in headers: pass elif cookie is not None: headers['Cookie'] = cookie if redirect is False: class NoRedirectHandler(urllib2.HTTPRedirectHandler): def http_error_302(self, reqst, fp, code, msg, head): infourl = addinfourl(fp, head, reqst.get_full_url()) infourl.status = code infourl.code = code return infourl http_error_300 = http_error_302 http_error_301 = http_error_302 http_error_303 = http_error_302 http_error_307 = http_error_302 opener = urllib2.build_opener(NoRedirectHandler()) urllib2.install_opener(opener) try: del headers['Referer'] except Exception: pass req = urllib2.Request(url, data=post, headers=headers) try: response = urllib2.urlopen(req, timeout=int(timeout)) except urllib2.HTTPError as response: if response.code == 503: if 'cf-browser-verification' in response.read(5242880): netloc = '%s://%s' % (urlparse(url).scheme, urlparse(url).netloc) cf = cfcookie(netloc, headers['User-Agent'], timeout) headers['Cookie'] = cf req = urllib2.Request(url, data=post, headers=headers) response = urllib2.urlopen(req, timeout=int(timeout)) elif error is False: return elif error is False: return if output == 'cookie': try: result = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except BaseException: pass try: result = cf except BaseException: pass elif output == 'response': if limit == '0': result = (str(response.code), response.read(224 * 1024)) elif limit is not None: result = (str(response.code), response.read(int(limit) * 1024)) else: result = (str(response.code), response.read(5242880)) elif output == 'chunk': try: content = int(response.headers['Content-Length']) except BaseException: content = (2049 * 1024) if content < (2048 * 1024): return result = response.read(16 * 1024) elif output == 'extended': try: cookie = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except BaseException: pass try: cookie = cf except BaseException: pass content = response.headers result = response.read(5242880) return result, headers, content, cookie elif output == 'geturl': result = response.geturl() elif output == 'headers': content = response.headers return content else: if limit == '0': result = response.read(224 * 1024) elif limit is not None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) if close is True: response.close() return result except BaseException: return