def _verify_cert(self, peercert): """Returns True if peercert is valid according to the configured validation mode and hostname. The ssl handshake already tested the certificate for a valid CA signature; the only thing that remains is to check the hostname. """ if isinstance(self._ssl_options, dict): verify_mode = self._ssl_options.get('cert_reqs', ssl.CERT_NONE) elif isinstance(self._ssl_options, ssl.SSLContext): verify_mode = self._ssl_options.verify_mode assert verify_mode in (ssl.CERT_NONE, ssl.CERT_REQUIRED, ssl.CERT_OPTIONAL) if verify_mode == ssl.CERT_NONE or self._server_hostname is None: return True cert = self.socket.getpeercert() if cert is None and verify_mode == ssl.CERT_REQUIRED: gen_log.warning("No SSL certificate given") return False try: ssl_match_hostname(peercert, self._server_hostname) except SSLCertificateError: gen_log.warning("Invalid SSL certificate", exc_info=True) return False else: return True
def parse_body_arguments(content_type, body, arguments, files): """Parses a form request body. Supports ``application/x-www-form-urlencoded`` and ``multipart/form-data``. The ``content_type`` parameter should be a string and ``body`` should be a byte string. The ``arguments`` and ``files`` parameters are dictionaries that will be updated with the parsed contents. """ if content_type.startswith("application/x-www-form-urlencoded"): try: uri_arguments = parse_qs_bytes(native_str(body), keep_blank_values=True) except Exception as e: gen_log.warning('Invalid x-www-form-urlencoded body: %s', e) uri_arguments = {} for name, values in uri_arguments.items(): if values: arguments.setdefault(name, []).extend(values) elif content_type.startswith("multipart/form-data"): fields = content_type.split(";") for field in fields: k, sep, v = field.strip().partition("=") if k == "boundary" and v: parse_multipart_form_data(utf8(v), body, arguments, files) break else: gen_log.warning("Invalid multipart/form-data")
def _handle_read(self): try: try: # Pretend to have a pending callback so that an EOF in # _read_to_buffer doesn't trigger an immediate close # callback. At the end of this method we'll either # estabilsh a real pending callback via # _read_from_buffer or run the close callback. # # We need two try statements here so that # pending_callbacks is decremented before the `except` # clause below (which calls `close` and does need to # trigger the callback) self._pending_callbacks += 1 while not self.closed(): # Read from the socket until we get EWOULDBLOCK or equivalent. # SSL sockets do some internal buffering, and if the data is # sitting in the SSL object's buffer select() and friends # can't see it; the only way to find out if it's there is to # try to read it. if self._read_to_buffer() == 0: break finally: self._pending_callbacks -= 1 except Exception: gen_log.warning("error on read", exc_info=True) self.close(exc_info=True) return if self._read_from_buffer(): return else: self._maybe_run_close_callback()
def log_stack(self, signal, frame): """Signal handler to log the stack trace of the current thread. For use with `set_blocking_signal_threshold`. """ gen_log.warning('IOLoop blocked for %f seconds in\n%s', self._blocking_signal_threshold, ''.join(traceback.format_stack(frame)))
def start(io_loop=None, check_time=500): """Begins watching source files for changes using the given `.IOLoop`. """ io_loop = io_loop or ioloop.IOLoop.current() if io_loop in _io_loops: return _io_loops[io_loop] = True if len(_io_loops) > 1: gen_log.warning("webalchemy.tornado.autoreload started more than once in the same process") add_reload_hook(functools.partial(io_loop.close, all_fds=True)) modify_times = {} callback = functools.partial(_reload_on_update, modify_times) scheduler = ioloop.PeriodicCallback(callback, check_time, io_loop=io_loop) scheduler.start()
def _handle_connect(self): err = self.socket.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR) if err != 0: self.error = socket.error(err, os.strerror(err)) # IOLoop implementations may vary: some of them return # an error state before the socket becomes writable, so # in that case a connection failure would be handled by the # error path in _handle_events instead of here. gen_log.warning("Connect error on fd %d: %s", self.socket.fileno(), errno.errorcode[err]) self.close() return if self._connect_callback is not None: callback = self._connect_callback self._connect_callback = None self._run_callback(callback) self._connecting = False
def _do_ssl_handshake(self): # Based on code from test_ssl.py in the python stdlib try: self._handshake_reading = False self._handshake_writing = False self.socket.do_handshake() except ssl.SSLError as err: if err.args[0] == ssl.SSL_ERROR_WANT_READ: self._handshake_reading = True return elif err.args[0] == ssl.SSL_ERROR_WANT_WRITE: self._handshake_writing = True return elif err.args[0] in (ssl.SSL_ERROR_EOF, ssl.SSL_ERROR_ZERO_RETURN): return self.close(exc_info=True) elif err.args[0] == ssl.SSL_ERROR_SSL: try: peer = self.socket.getpeername() except Exception: peer = '(not connected)' gen_log.warning("SSL Error on %d %s: %s", self.socket.fileno(), peer, err) return self.close(exc_info=True) raise except socket.error as err: if err.args[0] in _ERRNO_CONNRESET: return self.close(exc_info=True) except AttributeError: # On Linux, if the connection was reset before the call to # wrap_socket, do_handshake will fail with an # AttributeError. return self.close(exc_info=True) else: self._ssl_accepting = False if not self._verify_cert(self.socket.getpeercert()): self.close() return if self._ssl_connect_callback is not None: callback = self._ssl_connect_callback self._ssl_connect_callback = None self._run_callback(callback)
def _handle_write(self): while self._write_buffer: try: if not self._write_buffer_frozen: # On windows, socket.send blows up if given a # write buffer that's too large, instead of just # returning the number of bytes it was able to # process. Therefore we must not call socket.send # with more than 128KB at a time. _merge_prefix(self._write_buffer, 128 * 1024) num_bytes = self.write_to_fd(self._write_buffer[0]) if num_bytes == 0: # With OpenSSL, if we couldn't write the entire buffer, # the very same string object must be used on the # next call to send. Therefore we suppress # merging the write buffer after an incomplete send. # A cleaner solution would be to set # SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER, but this is # not yet accessible from python # (http://bugs.python.org/issue8240) self._write_buffer_frozen = True break self._write_buffer_frozen = False _merge_prefix(self._write_buffer, num_bytes) self._write_buffer.popleft() except (socket.error, IOError, OSError) as e: if e.args[0] in _ERRNO_WOULDBLOCK: self._write_buffer_frozen = True break else: if e.args[0] not in _ERRNO_CONNRESET: # Broken pipe errors are usually caused by connection # reset, and its better to not log EPIPE errors to # minimize log spam gen_log.warning("Write error on %d: %s", self.fileno(), e) self.close(exc_info=True) return if not self._write_buffer and self._write_callback: callback = self._write_callback self._write_callback = None self._run_callback(callback)
def initialize(self, io_loop, max_clients=10, defaults=None): super(CurlAsyncHTTPClient, self).initialize(io_loop, defaults=defaults) self._multi = pycurl.CurlMulti() self._multi.setopt(pycurl.M_TIMERFUNCTION, self._set_timeout) self._multi.setopt(pycurl.M_SOCKETFUNCTION, self._handle_socket) self._curls = [_curl_create() for i in range(max_clients)] self._free_list = self._curls[:] self._requests = collections.deque() self._fds = {} self._timeout = None try: self._socket_action = self._multi.socket_action except AttributeError: # socket_action is found in pycurl since 7.18.2 (it's been # in libcurl longer than that but wasn't accessible to # python). gen_log.warning("socket_action method missing from pycurl; " "falling back to socket_all. Upgrading " "libcurl and pycurl will improve performance") self._socket_action = \ lambda fd, action: self._multi.socket_all() # libcurl has bugs that sometimes cause it to not report all # relevant file descriptors and timeouts to TIMERFUNCTION/ # SOCKETFUNCTION. Mitigate the effects of such bugs by # forcing a periodic scan of all active requests. self._force_timeout_callback = ioloop.PeriodicCallback( self._handle_force_timeout, 1000, io_loop=io_loop) self._force_timeout_callback.start() # Work around a bug in libcurl 7.29.0: Some fields in the curl # multi object are initialized lazily, and its destructor will # segfault if it is destroyed without having been used. Add # and remove a dummy handle to make sure everything is # initialized. dummy_curl_handle = pycurl.Curl() self._multi.add_handle(dummy_curl_handle) self._multi.remove_handle(dummy_curl_handle)
def connect(self, address, callback=None, server_hostname=None): """Connects the socket to a remote address without blocking. May only be called if the socket passed to the constructor was not previously connected. The address parameter is in the same format as for `socket.connect <socket.socket.connect>`, i.e. a ``(host, port)`` tuple. If ``callback`` is specified, it will be called when the connection is completed. If specified, the ``server_hostname`` parameter will be used in SSL connections for certificate validation (if requested in the ``ssl_options``) and SNI (if supported; requires Python 3.2+). Note that it is safe to call `IOStream.write <BaseIOStream.write>` while the connection is pending, in which case the data will be written as soon as the connection is ready. Calling `IOStream` read methods before the socket is connected works on some platforms but is non-portable. """ self._connecting = True try: self.socket.connect(address) except socket.error as e: # In non-blocking mode we expect connect() to raise an # exception with EINPROGRESS or EWOULDBLOCK. # # On freebsd, other errors such as ECONNREFUSED may be # returned immediately when attempting to connect to # localhost, so handle them the same way as an error # reported later in _handle_connect. if (e.args[0] != errno.EINPROGRESS and e.args[0] not in _ERRNO_WOULDBLOCK): gen_log.warning("Connect error on fd %d: %s", self.socket.fileno(), e) self.close(exc_info=True) return self._connect_callback = stack_context.wrap(callback) self._add_io_state(self.io_loop.WRITE)
def _handle_events(self, fd, events): if self.closed(): gen_log.warning("Got events for closed stream %d", fd) return try: if events & self.io_loop.READ: self._handle_read() if self.closed(): return if events & self.io_loop.WRITE: if self._connecting: self._handle_connect() self._handle_write() if self.closed(): return if events & self.io_loop.ERROR: self.error = self.get_fd_error() # We may have queued up a user callback in _handle_read or # _handle_write, so don't close the IOStream until those # callbacks have had a chance to run. self.io_loop.add_callback(self.close) return state = self.io_loop.ERROR if self.reading(): state |= self.io_loop.READ if self.writing(): state |= self.io_loop.WRITE if state == self.io_loop.ERROR: state |= self.io_loop.READ if state != self._state: assert self._state is not None, \ "shouldn't happen: _handle_events without self._state" self._state = state self.io_loop.update_handler(self.fileno(), self._state) except Exception: gen_log.error("Uncaught exception, closing connection.", exc_info=True) self.close(exc_info=True) raise
def parse_multipart_form_data(boundary, data, arguments, files): """Parses a ``multipart/form-data`` body. The ``boundary`` and ``data`` parameters are both byte strings. The dictionaries given in the arguments and files parameters will be updated with the contents of the body. """ # The standard allows for the boundary to be quoted in the header, # although it's rare (it happens at least for google app engine # xmpp). I think we're also supposed to handle backslash-escapes # here but I'll save that until we see a client that uses them # in the wild. if boundary.startswith(b'"') and boundary.endswith(b'"'): boundary = boundary[1:-1] final_boundary_index = data.rfind(b"--" + boundary + b"--") if final_boundary_index == -1: gen_log.warning("Invalid multipart/form-data: no final boundary") return parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n") for part in parts: if not part: continue eoh = part.find(b"\r\n\r\n") if eoh == -1: gen_log.warning("multipart/form-data missing headers") continue headers = HTTPHeaders.parse(part[:eoh].decode("utf-8")) disp_header = headers.get("Content-Disposition", "") disposition, disp_params = _parse_header(disp_header) if disposition != "form-data" or not part.endswith(b"\r\n"): gen_log.warning("Invalid multipart/form-data") continue value = part[eoh + 4:-2] if not disp_params.get("name"): gen_log.warning("multipart/form-data value missing name") continue name = disp_params["name"] if disp_params.get("filename"): ctype = headers.get("Content-Type", "application/unknown") files.setdefault(name, []).append(HTTPFile( filename=disp_params["filename"], body=value, content_type=ctype)) else: arguments.setdefault(name, []).append(value)
def fork_processes(num_processes, max_restarts=100): """Starts multiple worker processes. If ``num_processes`` is None or <= 0, we detect the number of cores available on this machine and fork that number of child processes. If ``num_processes`` is given and > 0, we fork that specific number of sub-processes. Since we use processes and not threads, there is no shared memory between any server code. Note that multiple processes are not compatible with the autoreload module (or the ``autoreload=True`` option to `webalchemy.tornado.web.Application` which defaults to True when ``debug=True``). When using multiple processes, no IOLoops can be created or referenced until after the call to ``fork_processes``. In each child process, ``fork_processes`` returns its *task id*, a number between 0 and ``num_processes``. Processes that exit abnormally (due to a signal or non-zero exit status) are restarted with the same id (up to ``max_restarts`` times). In the parent process, ``fork_processes`` returns None if all child processes have exited normally, but will otherwise only exit by throwing an exception. """ global _task_id assert _task_id is None if num_processes is None or num_processes <= 0: num_processes = cpu_count() if ioloop.IOLoop.initialized(): raise RuntimeError("Cannot run in multiple processes: IOLoop instance " "has already been initialized. You cannot call " "IOLoop.instance() before calling start_processes()") gen_log.info("Starting %d processes", num_processes) children = {} def start_child(i): pid = os.fork() if pid == 0: # child process _reseed_random() global _task_id _task_id = i return i else: children[pid] = i return None for i in range(num_processes): id = start_child(i) if id is not None: return id num_restarts = 0 while children: try: pid, status = os.wait() except OSError as e: if e.errno == errno.EINTR: continue raise if pid not in children: continue id = children.pop(pid) if os.WIFSIGNALED(status): gen_log.warning("child %d (pid %d) killed by signal %d, restarting", id, pid, os.WTERMSIG(status)) elif os.WEXITSTATUS(status) != 0: gen_log.warning("child %d (pid %d) exited with status %d, restarting", id, pid, os.WEXITSTATUS(status)) else: gen_log.info("child %d (pid %d) exited normally", id, pid) continue num_restarts += 1 if num_restarts > max_restarts: raise RuntimeError("Too many child restarts, giving up") new_id = start_child(id) if new_id is not None: return new_id # All child processes exited cleanly, so exit the master process # instead of just returning to right after the call to # fork_processes (which will probably just start up another IOLoop # unless the caller checks the return value). sys.exit(0)
def main(): """Command-line wrapper to re-run a script whenever its source changes. Scripts may be specified by filename or module name:: python -m webalchemy.tornado.autoreload -m webalchemy.tornado.test.runtests python -m webalchemy.tornado.autoreload webalchemy.tornado/test/runtests.py Running a script with this wrapper is similar to calling `webalchemy.tornado.autoreload.wait` at the end of the script, but this wrapper can catch import-time problems like syntax errors that would otherwise prevent the script from reaching its call to `wait`. """ original_argv = sys.argv sys.argv = sys.argv[:] if len(sys.argv) >= 3 and sys.argv[1] == "-m": mode = "module" module = sys.argv[2] del sys.argv[1:3] elif len(sys.argv) >= 2: mode = "script" script = sys.argv[1] sys.argv = sys.argv[1:] else: print(_USAGE, file=sys.stderr) sys.exit(1) try: if mode == "module": import runpy runpy.run_module(module, run_name="__main__", alter_sys=True) elif mode == "script": with open(script) as f: global __file__ __file__ = script # Use globals as our "locals" dictionary so that # something that tries to import __main__ (e.g. the unittest # module) will see the right things. exec_in(f.read(), globals(), globals()) except SystemExit as e: logging.basicConfig() gen_log.info("Script exited with status %s", e.code) except Exception as e: logging.basicConfig() gen_log.warning("Script exited with uncaught exception", exc_info=True) # If an exception occurred at import time, the file with the error # never made it into sys.modules and so we won't know to watch it. # Just to make sure we've covered everything, walk the stack trace # from the exception and watch every file. for (filename, lineno, name, line) in traceback.extract_tb(sys.exc_info()[2]): watch(filename) if isinstance(e, SyntaxError): # SyntaxErrors are special: their innermost stack frame is fake # so extract_tb won't see it and we have to get the filename # from the exception object. watch(e.filename) else: logging.basicConfig() gen_log.info("Script exited normally") # restore sys.argv so subsequent executions will include autoreload sys.argv = original_argv if mode == 'module': # runpy did a fake import of the module as __main__, but now it's # no longer in sys.modules. Figure out where it is and watch it. loader = pkgutil.get_loader(module) if loader is not None: watch(loader.get_filename()) wait()