def test_escape_decode(self): import _codecs test = _codecs.escape_encode(b'a\n\\b\x00c\td\u2045')[0] assert _codecs.escape_decode(test)[0] == b'a\n\\b\x00c\td\u2045' assert _codecs.escape_decode(b'\\077')[0] == b'?' assert _codecs.escape_decode(b'\\100')[0] == b'@' assert _codecs.escape_decode(b'\\253')[0] == bytes([0o253]) assert _codecs.escape_decode(b'\\312')[0] == bytes([0o312])
def test_escape_decode(): ''' ''' #sanity checks value, length = codecs.escape_decode("ab\a\b\t\n\r\f\vba") AreEqual(value, 'ab\x07\x08\t\n\r\x0c\x0bba') AreEqual(length, 11) value, length = codecs.escape_decode("\\a") AreEqual(value, '\x07') AreEqual(length, 2) value, length = codecs.escape_decode("ab\a\b\t\n\r\f\vbaab\\a\\b\\t\\n\\r\\f\\vbaab\\\a\\\b\\\t\\\n\\\r\\\f\\\vba") AreEqual(value, 'ab\x07\x08\t\n\r\x0c\x0bbaab\x07\x08\t\n\r\x0c\x0bbaab\\\x07\\\x08\\\t\\\r\\\x0c\\\x0bba') AreEqual(length, 47) value, length = codecs.escape_decode("\\\a") AreEqual(value, '\\\x07') AreEqual(length, 2) AreEqual("abc", codecs.escape_decode("abc", None)[0]) AreEqual("?\\", codecs.escape_decode("\\x", 'replace')[0]) AreEqual("?\\x", codecs.escape_decode("\\x2", 'replace')[0]) AreEqual("?\\x", codecs.escape_decode("\\xI", 'replace')[0]) AreEqual("?\\xI", codecs.escape_decode("\\xII", 'replace')[0]) AreEqual("?\\x1", codecs.escape_decode("\\x1I", 'replace')[0]) AreEqual("?\\xI", codecs.escape_decode("\\xI1", 'replace')[0])
def test_escape_decode_ignore_invalid(self): import _codecs assert _codecs.escape_decode(b'\\9')[0] == b'\\9' assert _codecs.escape_decode(b'\\01')[0] == b'\x01' assert _codecs.escape_decode(b'\\0f')[0] == b'\0' + b'f' assert _codecs.escape_decode(b'\\08')[0] == b'\0' + b'8'
def test_escape_decode_wrap_around(self): import _codecs assert _codecs.escape_decode(b'\\400')[0] == b'\0'
def curlparse(string, encoding="utf-8", remain_unknown_args=False): """Translate curl-string into dict of request. Do not support file upload which contains @file_path. :param string: standard curl-string, like `r'''curl ...'''`. :param encoding: encoding for post-data encoding. Copy from torequests. Basic Usage:: >>> from torequests.utils import curlparse >>> curl_string = '''curl 'https://p.3.cn?skuIds=1&nonsense=1&nonce=0' -H 'Pragma: no-cache' -H 'DNT: 1' -H 'Accept-Encoding: gzip, deflate' -H 'Accept-Language: zh-CN,zh;q=0.9' -H 'Upgrade-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' -H 'Cache-Control: no-cache' -H 'Referer: https://p.3.cn?skuIds=1&nonsense=1&nonce=0' -H 'Cookie: ASPSESSIONIDSQRRSADB=MLHDPOPCAMBDGPFGBEEJKLAF' -H 'Connection: keep-alive' --compressed''' >>> request_args = curlparse(curl_string) >>> request_args {'url': 'https://p.3.cn?skuIds=1&nonsense=1&nonce=0', 'headers': {'Pragma': 'no-cache', 'Dnt': '1', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Cache-Control': 'no-cache', 'Referer': 'https://p.3.cn?skuIds=1&nonsense=1&nonce=0', 'Cookie': 'ASPSESSIONIDSQRRSADB=MLHDPOPCAMBDGPFGBEEJKLAF', 'Connection': 'keep-alive'}, 'method': 'get'} >>> import requests >>> requests.request(**request_args) <Response [200]> """ def unescape_sig(s): if s.startswith(escape_sig): return decode_as_base64(s[len(escape_sig):], encoding=encoding) else: return s escape_sig = u'fac4833e034b6771e5a1c74037e9153e' if string.startswith("http"): return {"url": string, "method": "get"} # escape $'' ANSI-C strings for arg in re_findall(r"\$'[\s\S]*(?<!\\)'", string): _escaped = escape_decode(bytes(arg[2:-1], encoding))[0].decode(encoding) string = string.replace( arg, "'{}{}'".format(escape_sig, encode_as_base64(_escaped, encoding=encoding))) lex_list = shlex_split(string.strip()) args, unknown = _Curl.parser.parse_known_args(lex_list) requests_args = {} headers = {} requests_args["url"] = unescape_sig(args.url) if not requests_args["url"]: for arg in unknown: if re_match(r'https?://', arg): requests_args["url"] = arg break for header in args.header: key, value = unescape_sig(header).split(":", 1) headers[key.title()] = value.strip() if args.user_agent: headers["User-Agent"] = unescape_sig(args.user_agent) if args.referer: headers["Referer"] = args.referer if headers: requests_args["headers"] = headers if args.user: requests_args["auth"] = [ u for u in unescape_sig(args.user).split(":", 1) + [""] ][:2] # if args.proxy: # pass data = args.data or args.data_binary or args.form if args.data_urlencode: data = quote_plus(args.data_urlencode) if data: args.request = "post" # if PY2: # # not fix the UnicodeEncodeError, so use `replace`, damn python2.x. # data = data.replace(r'\r', '\r').replace(r'\n', '\n') # else: # data = data.encode( # 'latin-1', # 'backslashreplace').decode('unicode-escape').encode(encoding) requests_args["data"] = unescape_sig(data).encode(encoding) requests_args["method"] = args.request.lower() if args.head: requests_args['method'] = 'head' if args.connect_timeout and args.max_time: requests_args["timeout"] = (args.connect_timeout, args.max_time) elif args.connect_timeout: requests_args["timeout"] = args.connect_timeout elif args.max_time: requests_args["timeout"] = args.max_time if remain_unknown_args: requests_args['unknown_args'] = unknown if args.location: requests_args['allow_redirects'] = True if args.retry_max_time: requests_args['retry'] = args.retry_max_time return requests_args