def _curl_debug(self, debug_type, debug_msg): debug_types = ('I', '<', '>', '<', '>') if debug_type == 0: debug_msg = native_str(debug_msg) curl_log.debug('%s', debug_msg.strip()) elif debug_type in (1, 2): debug_msg = native_str(debug_msg) for line in debug_msg.splitlines(): curl_log.debug('%s %s', debug_types[debug_type], line) elif debug_type == 4: curl_log.debug('%s %r', debug_types[debug_type], debug_msg)
def add(self, name, value): # type: (str, str) -> None """Adds a new value for the given key.""" norm_name = _normalized_headers[name] self._last_key = norm_name if norm_name in self: self._dict[norm_name] = (native_str(self[norm_name]) + ',' + native_str(value)) self._as_list[norm_name].append(value) else: self[norm_name] = value
def write_headers(self, start_line, headers, chunk=None, callback=None): if self.method == 'HEAD': self._expected_content_remaining = 0 elif 'Content-Length' in headers: self._expected_content_remaining = int(headers['Content-Length']) else: self._expected_content_remaining = None self.start_response('%s %s' % (start_line.code, start_line.reason), [(native_str(k), native_str(v)) for (k, v) in headers.get_all()]) if chunk is not None: self.write(chunk, callback) elif callback is not None: callback() return _dummy_future()
def _parse_header(line): r"""Parse a Content-type like header. Return the main content-type and a dictionary of options. >>> d = "form-data; foo=\"b\\\\a\\\"r\"; file*=utf-8''T%C3%A4st" >>> ct, d = _parse_header(d) >>> ct 'form-data' >>> d['file'] == r'T\u00e4st'.encode('ascii').decode('unicode_escape') True >>> d['foo'] 'b\\a"r' """ parts = _parseparam(';' + line) key = next(parts) # decode_params treats first argument special, but we already stripped key params = [('Dummy', 'value')] for p in parts: i = p.find('=') if i >= 0: name = p[:i].strip().lower() value = p[i + 1:].strip() params.append((name, native_str(value))) params = email.utils.decode_params(params) params.pop(0) # get rid of the dummy again pdict = {} for name, value in params: value = email.utils.collapse_rfc2231_value(value) if len(value) >= 2 and value[0] == '"' and value[-1] == '"': value = value[1:-1] pdict[name] = value return key, pdict
def parse_body_arguments(content_type, body, arguments, files, headers=None): """Parses a form request body. Supports ``application/x-www-form-urlencoded`` and ``multipart/form-data``. The ``content_type`` parameter should be a string and ``body`` should be a byte string. The ``arguments`` and ``files`` parameters are dictionaries that will be updated with the parsed contents. """ if headers and 'Content-Encoding' in headers: gen_log.warning("Unsupported Content-Encoding: %s", headers['Content-Encoding']) return if content_type.startswith("application/x-www-form-urlencoded"): try: uri_arguments = parse_qs_bytes(native_str(body), keep_blank_values=True) except Exception as e: gen_log.warning('Invalid x-www-form-urlencoded body: %s', e) uri_arguments = {} for name, values in uri_arguments.items(): if values: arguments.setdefault(name, []).extend(values) elif content_type.startswith("multipart/form-data"): try: fields = content_type.split(";") for field in fields: k, sep, v = field.strip().partition("=") if k == "boundary" and v: parse_multipart_form_data(utf8(v), body, arguments, files) break else: raise ValueError("multipart boundary not found") except Exception as e: gen_log.warning("Invalid multipart/form-data: %s", e)
def main(): from tornado_py2.options import define, options, parse_command_line define("print_headers", type=bool, default=False) define("print_body", type=bool, default=True) define("follow_redirects", type=bool, default=True) define("validate_cert", type=bool, default=True) define("proxy_host", type=str) define("proxy_port", type=int) args = parse_command_line() client = HTTPClient() for arg in args: try: response = client.fetch(arg, follow_redirects=options.follow_redirects, validate_cert=options.validate_cert, proxy_host=options.proxy_host, proxy_port=options.proxy_port, ) except HTTPError as e: if e.response is not None: response = e.response else: raise if options.print_headers: print(response.headers) if options.print_body: print(native_str(response.body)) client.close()
def __str__(self): if self.address_family in (socket.AF_INET, socket.AF_INET6): return self.remote_ip elif isinstance(self.address, bytes): # Python 3 with the -bb option warns about str(bytes), # so convert it explicitly. # Unix socket addresses are str on mac but bytes on linux. return native_str(self.address) else: return str(self.address)
def _parse_headers(self, data): # The lstrip removes newlines that some implementations sometimes # insert between messages of a reused connection. Per RFC 7230, # we SHOULD ignore at least one empty line before the request. # http://tools.ietf.org/html/rfc7230#section-3.5 data = native_str(data.decode('latin1')).lstrip("\r\n") # RFC 7230 section allows for both CRLF and bare LF. eol = data.find("\n") start_line = data[:eol].rstrip("\r") headers = httputil.HTTPHeaders.parse(data[eol:]) return start_line, headers
def _oauth_parse_response(body): # I can't find an officially-defined encoding for oauth responses and # have never seen anyone use non-ascii. Leave the response in a byte # string for python 2, and use utf8 on python 3. body = escape.native_str(body) p = urlparse.parse_qs(body, keep_blank_values=False) token = dict(key=p["oauth_token"][0], secret=p["oauth_token_secret"][0]) # Add the extra parameters the Provider included to the token special = ("oauth_token", "oauth_token_secret") token.update((k, p[k][0]) for k in p if k not in special) return token
def parse_response_start_line(line): """Returns a (version, code, reason) tuple for an HTTP 1.x response line. The response is a `collections.namedtuple`. >>> parse_response_start_line("HTTP/1.1 200 OK") ResponseStartLine(version='HTTP/1.1', code=200, reason='OK') """ line = native_str(line) match = re.match("(HTTP/1.[0-9]) ([0-9]+) ([^\r]*)", line) if not match: raise HTTPInputError("Error parsing response start line") return ResponseStartLine(match.group(1), int(match.group(2)), match.group(3))
def _curl_header_callback(self, headers, header_callback, header_line): header_line = native_str(header_line.decode('latin1')) if header_callback is not None: self.io_loop.add_callback(header_callback, header_line) # header_line as returned by curl includes the end-of-line characters. # whitespace at the start should be preserved to allow multi-line headers header_line = header_line.rstrip() if header_line.startswith("HTTP/"): headers.clear() try: (__, __, reason) = httputil.parse_response_start_line(header_line) header_line = "X-Http-Reason: %s" % reason except httputil.HTTPInputError: return if not header_line: return headers.parse_line(header_line)
def write_headers(self, start_line, headers, chunk=None, callback=None): """Implements `.HTTPConnection.write_headers`.""" lines = [] if self.is_client: self._request_start_line = start_line lines.append( utf8('%s %s HTTP/1.1' % (start_line[0], start_line[1]))) # Client requests with a non-empty body must have either a # Content-Length or a Transfer-Encoding. self._chunking_output = (start_line.method in ('POST', 'PUT', 'PATCH') and 'Content-Length' not in headers and 'Transfer-Encoding' not in headers) else: self._response_start_line = start_line lines.append( utf8('HTTP/1.1 %d %s' % (start_line[1], start_line[2]))) self._chunking_output = ( # TODO: should this use # self._request_start_line.version or # start_line.version? self._request_start_line.version == 'HTTP/1.1' and # 1xx, 204 and 304 responses have no body (not even a zero-length # body), and so should not have either Content-Length or # Transfer-Encoding headers. start_line.code not in (204, 304) and (start_line.code < 100 or start_line.code >= 200) and # No need to chunk the output if a Content-Length is specified. 'Content-Length' not in headers and # Applications are discouraged from touching Transfer-Encoding, # but if they do, leave it alone. 'Transfer-Encoding' not in headers) # If connection to a 1.1 client will be closed, inform client if (self._request_start_line.version == 'HTTP/1.1' and self._disconnect_on_finish): headers['Connection'] = 'close' # If a 1.0 client asked for keep-alive, add the header. if (self._request_start_line.version == 'HTTP/1.0' and self._request_headers.get('Connection', '').lower() == 'keep-alive'): headers['Connection'] = 'Keep-Alive' if self._chunking_output: headers['Transfer-Encoding'] = 'chunked' if (not self.is_client and (self._request_start_line.method == 'HEAD' or start_line.code == 304)): self._expected_content_remaining = 0 elif 'Content-Length' in headers: self._expected_content_remaining = int(headers['Content-Length']) else: self._expected_content_remaining = None # TODO: headers are supposed to be of type str, but we still have some # cases that let bytes slip through. Remove these native_str calls when those # are fixed. header_lines = (native_str(n) + ": " + native_str(v) for n, v in headers.get_all()) if PY3: lines.extend(l.encode('latin1') for l in header_lines) else: lines.extend(header_lines) for line in lines: if b'\n' in line: raise ValueError('Newline in header: ' + repr(line)) future = None if self.stream.closed(): future = self._write_future = Future() future.set_exception(iostream.StreamClosedError()) future.exception() else: if callback is not None: warnings.warn( "callback argument is deprecated, use returned Future instead", DeprecationWarning) self._write_callback = stack_context.wrap(callback) else: future = self._write_future = Future() data = b"\r\n".join(lines) + b"\r\n\r\n" if chunk: data += self._format_chunk(chunk) self._pending_write = self.stream.write(data) future_add_done_callback(self._pending_write, self._on_write_complete) return future
def __init__(self, template_string, name="<string>", loader=None, compress_whitespace=_UNSET, autoescape=_UNSET, whitespace=None): """Construct a Template. :arg str template_string: the contents of the template file. :arg str name: the filename from which the template was loaded (used for error message). :arg tornado.template.BaseLoader loader: the `~tornado.template.BaseLoader` responsible for this template, used to resolve ``{% include %}`` and ``{% extend %}`` directives. :arg bool compress_whitespace: Deprecated since Tornado 4.3. Equivalent to ``whitespace="single"`` if true and ``whitespace="all"`` if false. :arg str autoescape: The name of a function in the template namespace, or ``None`` to disable escaping by default. :arg str whitespace: A string specifying treatment of whitespace; see `filter_whitespace` for options. .. versionchanged:: 4.3 Added ``whitespace`` parameter; deprecated ``compress_whitespace``. """ self.name = escape.native_str(name) if compress_whitespace is not _UNSET: # Convert deprecated compress_whitespace (bool) to whitespace (str). if whitespace is not None: raise Exception( "cannot set both whitespace and compress_whitespace") whitespace = "single" if compress_whitespace else "all" if whitespace is None: if loader and loader.whitespace: whitespace = loader.whitespace else: # Whitespace defaults by filename. if name.endswith(".html") or name.endswith(".js"): whitespace = "single" else: whitespace = "all" # Validate the whitespace setting. filter_whitespace(whitespace, '') if autoescape is not _UNSET: self.autoescape = autoescape elif loader: self.autoescape = loader.autoescape else: self.autoescape = _DEFAULT_AUTOESCAPE self.namespace = loader.namespace if loader else {} reader = _TemplateReader(name, escape.native_str(template_string), whitespace) self.file = _File(self, _parse(reader, self)) self.code = self._generate_python(loader) self.loader = loader try: # Under python2.5, the fake filename used here must match # the module name used in __name__ below. # The dont_inherit flag prevents template.py's future imports # from being applied to the generated code. self.compiled = compile(escape.to_unicode(self.code), "%s.generated.py" % self.name.replace('.', '_'), "exec", dont_inherit=True) except Exception: formatted_code = _format_code(self.code).rstrip() app_log.error("%s code:\n%s", self.name, formatted_code) raise
def parse_config_file(self, path, final=True): """Parses and loads the config file at the given path. The config file contains Python code that will be executed (so it is **not safe** to use untrusted config files). Anything in the global namespace that matches a defined option will be used to set that option's value. Options may either be the specified type for the option or strings (in which case they will be parsed the same way as in `.parse_command_line`) Example (using the options defined in the top-level docs of this module):: port = 80 mysql_host = 'mydb.example.com:3306' # Both lists and comma-separated strings are allowed for # multiple=True. memcache_hosts = ['cache1.example.com:11011', 'cache2.example.com:11011'] memcache_hosts = 'cache1.example.com:11011,cache2.example.com:11011' If ``final`` is ``False``, parse callbacks will not be run. This is useful for applications that wish to combine configurations from multiple sources. .. note:: `tornado.options` is primarily a command-line library. Config file support is provided for applications that wish to use it, but applications that prefer config files may wish to look at other libraries instead. .. versionchanged:: 4.1 Config files are now always interpreted as utf-8 instead of the system default encoding. .. versionchanged:: 4.4 The special variable ``__file__`` is available inside config files, specifying the absolute path to the config file itself. .. versionchanged:: 5.1 Added the ability to set options via strings in config files. """ config = {'__file__': os.path.abspath(path)} with open(path, 'rb') as f: exec_in(native_str(f.read()), config, config) for name in config: normalized = self._normalize_name(name) if normalized in self._options: option = self._options[normalized] if option.multiple: if not isinstance(config[name], (list, str)): raise Error("Option %r is required to be a list of %s " "or a comma-separated string" % (option.name, option.type.__name__)) if type(config[name]) == str and option.type != str: option.parse(config[name]) else: option.set(config[name]) if final: self.run_parse_callbacks()
def _curl_setup_request(self, curl, request, buffer, headers): curl.setopt(pycurl.URL, native_str(request.url)) # libcurl's magic "Expect: 100-continue" behavior causes delays # with servers that don't support it (which include, among others, # Google's OpenID endpoint). Additionally, this behavior has # a bug in conjunction with the curl_multi_socket_action API # (https://sourceforge.net/tracker/?func=detail&atid=100976&aid=3039744&group_id=976), # which increases the delays. It's more trouble than it's worth, # so just turn off the feature (yes, setting Expect: to an empty # value is the official way to disable this) if "Expect" not in request.headers: request.headers["Expect"] = "" # libcurl adds Pragma: no-cache by default; disable that too if "Pragma" not in request.headers: request.headers["Pragma"] = "" curl.setopt(pycurl.HTTPHEADER, [ "%s: %s" % (native_str(k), native_str(v)) for k, v in request.headers.get_all() ]) curl.setopt( pycurl.HEADERFUNCTION, functools.partial(self._curl_header_callback, headers, request.header_callback)) if request.streaming_callback: def write_function(chunk): self.io_loop.add_callback(request.streaming_callback, chunk) else: write_function = buffer.write curl.setopt(pycurl.WRITEFUNCTION, write_function) curl.setopt(pycurl.FOLLOWLOCATION, request.follow_redirects) curl.setopt(pycurl.MAXREDIRS, request.max_redirects) curl.setopt(pycurl.CONNECTTIMEOUT_MS, int(1000 * request.connect_timeout)) curl.setopt(pycurl.TIMEOUT_MS, int(1000 * request.request_timeout)) if request.user_agent: curl.setopt(pycurl.USERAGENT, native_str(request.user_agent)) else: curl.setopt(pycurl.USERAGENT, "Mozilla/5.0 (compatible; pycurl)") if request.network_interface: curl.setopt(pycurl.INTERFACE, request.network_interface) if request.decompress_response: curl.setopt(pycurl.ENCODING, "gzip,deflate") else: curl.setopt(pycurl.ENCODING, "none") if request.proxy_host and request.proxy_port: curl.setopt(pycurl.PROXY, request.proxy_host) curl.setopt(pycurl.PROXYPORT, request.proxy_port) if request.proxy_username: credentials = httputil.encode_username_password( request.proxy_username, request.proxy_password) curl.setopt(pycurl.PROXYUSERPWD, credentials) if (request.proxy_auth_mode is None or request.proxy_auth_mode == "basic"): curl.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_BASIC) elif request.proxy_auth_mode == "digest": curl.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_DIGEST) else: raise ValueError("Unsupported proxy_auth_mode %s" % request.proxy_auth_mode) else: curl.setopt(pycurl.PROXY, '') curl.unsetopt(pycurl.PROXYUSERPWD) if request.validate_cert: curl.setopt(pycurl.SSL_VERIFYPEER, 1) curl.setopt(pycurl.SSL_VERIFYHOST, 2) else: curl.setopt(pycurl.SSL_VERIFYPEER, 0) curl.setopt(pycurl.SSL_VERIFYHOST, 0) if request.ca_certs is not None: curl.setopt(pycurl.CAINFO, request.ca_certs) else: # There is no way to restore pycurl.CAINFO to its default value # (Using unsetopt makes it reject all certificates). # I don't see any way to read the default value from python so it # can be restored later. We'll have to just leave CAINFO untouched # if no ca_certs file was specified, and require that if any # request uses a custom ca_certs file, they all must. pass if request.allow_ipv6 is False: # Curl behaves reasonably when DNS resolution gives an ipv6 address # that we can't reach, so allow ipv6 unless the user asks to disable. curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) else: curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER) # Set the request method through curl's irritating interface which makes # up names for almost every single method curl_options = { "GET": pycurl.HTTPGET, "POST": pycurl.POST, "PUT": pycurl.UPLOAD, "HEAD": pycurl.NOBODY, } custom_methods = set(["DELETE", "OPTIONS", "PATCH"]) for o in curl_options.values(): curl.setopt(o, False) if request.method in curl_options: curl.unsetopt(pycurl.CUSTOMREQUEST) curl.setopt(curl_options[request.method], True) elif request.allow_nonstandard_methods or request.method in custom_methods: curl.setopt(pycurl.CUSTOMREQUEST, request.method) else: raise KeyError('unknown method ' + request.method) body_expected = request.method in ("POST", "PATCH", "PUT") body_present = request.body is not None if not request.allow_nonstandard_methods: # Some HTTP methods nearly always have bodies while others # almost never do. Fail in this case unless the user has # opted out of sanity checks with allow_nonstandard_methods. if ((body_expected and not body_present) or (body_present and not body_expected)): raise ValueError( 'Body must %sbe None for method %s (unless ' 'allow_nonstandard_methods is true)' % ('not ' if body_expected else '', request.method)) if body_expected or body_present: if request.method == "GET": # Even with `allow_nonstandard_methods` we disallow # GET with a body (because libcurl doesn't allow it # unless we use CUSTOMREQUEST). While the spec doesn't # forbid clients from sending a body, it arguably # disallows the server from doing anything with them. raise ValueError('Body must be None for GET request') request_buffer = BytesIO(utf8(request.body or '')) def ioctl(cmd): if cmd == curl.IOCMD_RESTARTREAD: request_buffer.seek(0) curl.setopt(pycurl.READFUNCTION, request_buffer.read) curl.setopt(pycurl.IOCTLFUNCTION, ioctl) if request.method == "POST": curl.setopt(pycurl.POSTFIELDSIZE, len(request.body or '')) else: curl.setopt(pycurl.UPLOAD, True) curl.setopt(pycurl.INFILESIZE, len(request.body or '')) if request.auth_username is not None: if request.auth_mode is None or request.auth_mode == "basic": curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_BASIC) elif request.auth_mode == "digest": curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_DIGEST) else: raise ValueError("Unsupported auth_mode %s" % request.auth_mode) userpwd = httputil.encode_username_password( request.auth_username, request.auth_password) curl.setopt(pycurl.USERPWD, userpwd) curl_log.debug("%s %s (username: %r)", request.method, request.url, request.auth_username) else: curl.unsetopt(pycurl.USERPWD) curl_log.debug("%s %s", request.method, request.url) if request.client_cert is not None: curl.setopt(pycurl.SSLCERT, request.client_cert) if request.client_key is not None: curl.setopt(pycurl.SSLKEY, request.client_key) if request.ssl_options is not None: raise ValueError("ssl_options not supported in curl_httpclient") if threading.activeCount() > 1: # libcurl/pycurl is not thread-safe by default. When multiple threads # are used, signals should be disabled. This has the side effect # of disabling DNS timeouts in some environments (when libcurl is # not linked against ares), so we don't do it when there is only one # thread. Applications that use many short-lived threads may need # to set NOSIGNAL manually in a prepare_curl_callback since # there may not be any other threads running at the time we call # threading.activeCount. curl.setopt(pycurl.NOSIGNAL, 1) if request.prepare_curl_callback is not None: request.prepare_curl_callback(curl)