def _started_processing(self, servlet_name): """Record the fact that we are processing this request. This will log the request's arrival. Once the request completes, be sure to call finished_processing. Args: servlet_name (str): the name of the servlet which will be processing this request. This is used in the metrics. It is possible to update this afterwards by updating self.request_metrics.name. """ self.start_time = time.time() self.request_metrics = RequestMetrics() self.request_metrics.start(self.start_time, name=servlet_name, method=self.get_method()) self.site.access_logger.debug( "%s - %s - Received request: %s %s", self.getClientIP(), self.site.site_tag, self.get_method(), self.get_redacted_uri(), )
def _started_processing(self, servlet_name): self.start_time = int(time.time() * 1000) self.request_metrics = RequestMetrics() self.request_metrics.start(self.start_time, name=servlet_name) self.site.access_logger.info("%s - %s - Received request: %s %s", self.getClientIP(), self.site.site_tag, self.method, self.get_redacted_uri())
def _started_processing(self, servlet_name): """Record the fact that we are processing this request. This will log the request's arrival. Once the request completes, be sure to call finished_processing. Args: servlet_name (str): the name of the servlet which will be processing this request. This is used in the metrics. It is possible to update this afterwards by updating self.request_metrics.name. """ self.start_time = time.time() self.request_metrics = RequestMetrics() self.request_metrics.start( self.start_time, name=servlet_name, method=self.get_method(), ) self.site.access_logger.info( "%s - %s - Received request: %s %s", self.getClientIP(), self.site.site_tag, self.get_method(), self.get_redacted_uri() )
def _started_processing(self, servlet_name): self.start_time = int(time.time() * 1000) self.request_metrics = RequestMetrics() self.request_metrics.start(self.start_time, name=servlet_name) self.site.access_logger.info( "%s - %s - Received request: %s %s", self.getClientIP(), self.site.site_tag, self.method, self.get_redacted_uri() )
class SynapseRequest(Request): """Class which encapsulates an HTTP request to synapse. All of the requests processed in synapse are of this type. It extends twisted's twisted.web.server.Request, and adds: * Unique request ID * A log context associated with the request * Redaction of access_token query-params in __repr__ * Logging at start and end * Metrics to record CPU, wallclock and DB time by endpoint. It also provides a method `processing`, which returns a context manager. If this method is called, the request won't be logged until the context manager is closed; this is useful for asynchronous request handlers which may go on processing the request even after the client has disconnected. Attributes: logcontext: the log context for this request """ def __init__(self, channel, *args, **kw): Request.__init__(self, channel, *args, **kw) self.site = channel.site self._channel = channel # this is used by the tests self.start_time = 0.0 # The requester, if authenticated. For federation requests this is the # server name, for client requests this is the Requester object. self.requester = None # type: Optional[Union[Requester, str]] # we can't yet create the logcontext, as we don't know the method. self.logcontext = None # type: Optional[LoggingContext] global _next_request_seq self.request_seq = _next_request_seq _next_request_seq += 1 # whether an asynchronous request handler has called processing() self._is_processing = False # the time when the asynchronous request handler completed its processing self._processing_finished_time = None # what time we finished sending the response to the client (or the connection # dropped) self.finish_time = None def __repr__(self): # We overwrite this so that we don't log ``access_token`` return "<%s at 0x%x method=%r uri=%r clientproto=%r site=%r>" % ( self.__class__.__name__, id(self), self.get_method(), self.get_redacted_uri(), self.clientproto.decode("ascii", errors="replace"), self.site.site_tag, ) def get_request_id(self): return "%s-%i" % (self.get_method(), self.request_seq) def get_redacted_uri(self): uri = self.uri if isinstance(uri, bytes): uri = self.uri.decode("ascii", errors="replace") return redact_uri(uri) def get_method(self): """Gets the method associated with the request (or placeholder if not method has yet been received). Note: This is necessary as the placeholder value in twisted is str rather than bytes, so we need to sanitise `self.method`. Returns: str """ method = self.method if isinstance(method, bytes): method = self.method.decode("ascii") return method def render(self, resrc): # this is called once a Resource has been found to serve the request; in our # case the Resource in question will normally be a JsonResource. # create a LogContext for this request request_id = self.get_request_id() self.logcontext = LoggingContext(request_id, request=request_id) # override the Server header which is set by twisted self.setHeader("Server", self.site.server_version_string) with PreserveLoggingContext(self.logcontext): # we start the request metrics timer here with an initial stab # at the servlet name. For most requests that name will be # JsonResource (or a subclass), and JsonResource._async_render # will update it once it picks a servlet. servlet_name = resrc.__class__.__name__ self._started_processing(servlet_name) Request.render(self, resrc) # record the arrival of the request *after* # dispatching to the handler, so that the handler # can update the servlet name in the request # metrics requests_counter.labels(self.get_method(), self.request_metrics.name).inc() @contextlib.contextmanager def processing(self): """Record the fact that we are processing this request. Returns a context manager; the correct way to use this is: async def handle_request(request): with request.processing("FooServlet"): await really_handle_the_request() Once the context manager is closed, the completion of the request will be logged, and the various metrics will be updated. """ if self._is_processing: raise RuntimeError("Request is already processing") self._is_processing = True try: yield except Exception: # this should already have been caught, and sent back to the client as a 500. logger.exception( "Asynchronous message handler raised an uncaught exception") finally: # the request handler has finished its work and either sent the whole response # back, or handed over responsibility to a Producer. self._processing_finished_time = time.time() self._is_processing = False # if we've already sent the response, log it now; otherwise, we wait for the # response to be sent. if self.finish_time is not None: self._finished_processing() def finish(self): """Called when all response data has been written to this Request. Overrides twisted.web.server.Request.finish to record the finish time and do logging. """ self.finish_time = time.time() Request.finish(self) if not self._is_processing: assert self.logcontext is not None with PreserveLoggingContext(self.logcontext): self._finished_processing() def connectionLost(self, reason): """Called when the client connection is closed before the response is written. Overrides twisted.web.server.Request.connectionLost to record the finish time and do logging. """ # There is a bug in Twisted where reason is not wrapped in a Failure object # Detect this and wrap it manually as a workaround # More information: https://github.com/matrix-org/synapse/issues/7441 if not isinstance(reason, Failure): reason = Failure(reason) self.finish_time = time.time() Request.connectionLost(self, reason) if self.logcontext is None: logger.info( "Connection from %s lost before request headers were read", self.client) return # we only get here if the connection to the client drops before we send # the response. # # It's useful to log it here so that we can get an idea of when # the client disconnects. with PreserveLoggingContext(self.logcontext): logger.info("Connection from client lost before response was sent") if not self._is_processing: self._finished_processing() def _started_processing(self, servlet_name): """Record the fact that we are processing this request. This will log the request's arrival. Once the request completes, be sure to call finished_processing. Args: servlet_name (str): the name of the servlet which will be processing this request. This is used in the metrics. It is possible to update this afterwards by updating self.request_metrics.name. """ self.start_time = time.time() self.request_metrics = RequestMetrics() self.request_metrics.start(self.start_time, name=servlet_name, method=self.get_method()) self.site.access_logger.debug( "%s - %s - Received request: %s %s", self.getClientIP(), self.site.site_tag, self.get_method(), self.get_redacted_uri(), ) def _finished_processing(self): """Log the completion of this request and update the metrics """ assert self.logcontext is not None usage = self.logcontext.get_resource_usage() if self._processing_finished_time is None: # we completed the request without anything calling processing() self._processing_finished_time = time.time() # the time between receiving the request and the request handler finishing processing_time = self._processing_finished_time - self.start_time # the time between the request handler finishing and the response being sent # to the client (nb may be negative) response_send_time = self.finish_time - self._processing_finished_time # Convert the requester into a string that we can log authenticated_entity = None if isinstance(self.requester, str): authenticated_entity = self.requester elif isinstance(self.requester, Requester): authenticated_entity = self.requester.authenticated_entity # If this is a request where the target user doesn't match the user who # authenticated (e.g. and admin is puppetting a user) then we log both. if self.requester.user.to_string() != authenticated_entity: authenticated_entity = "{},{}".format( authenticated_entity, self.requester.user.to_string(), ) elif self.requester is not None: # This shouldn't happen, but we log it so we don't lose information # and can see that we're doing something wrong. authenticated_entity = repr( self.requester) # type: ignore[unreachable] user_agent = get_request_user_agent(self, "-") code = str(self.code) if not self.finished: # we didn't send the full response before we gave up (presumably because # the connection dropped) code += "!" log_level = logging.INFO if self._should_log_request( ) else logging.DEBUG self.site.access_logger.log( log_level, "%s - %s - {%s}" " Processed request: %.3fsec/%.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)" ' %sB %s "%s %s %s" "%s" [%d dbevts]', self.getClientIP(), self.site.site_tag, authenticated_entity, processing_time, response_send_time, usage.ru_utime, usage.ru_stime, usage.db_sched_duration_sec, usage.db_txn_duration_sec, int(usage.db_txn_count), self.sentLength, code, self.get_method(), self.get_redacted_uri(), self.clientproto.decode("ascii", errors="replace"), user_agent, usage.evt_db_fetch_count, ) try: self.request_metrics.stop(self.finish_time, self.code, self.sentLength) except Exception as e: logger.warning("Failed to stop metrics: %r", e) def _should_log_request(self) -> bool: """Whether we should log at INFO that we processed the request. """ if self.path == b"/health": return False if self.method == b"OPTIONS": return False return True
class SynapseRequest(Request): """Class which encapsulates an HTTP request to synapse. All of the requests processed in synapse are of this type. It extends twisted's twisted.web.server.Request, and adds: * Unique request ID * Redaction of access_token query-params in __repr__ * Logging at start and end * Metrics to record CPU, wallclock and DB time by endpoint. It provides a method `processing` which should be called by the Resource which is handling the request, and returns a context manager. """ def __init__(self, site, *args, **kw): Request.__init__(self, *args, **kw) self.site = site self.authenticated_entity = None self.start_time = 0 global _next_request_seq self.request_seq = _next_request_seq _next_request_seq += 1 def __repr__(self): # We overwrite this so that we don't log ``access_token`` return '<%s at 0x%x method=%s uri=%s clientproto=%s site=%s>' % ( self.__class__.__name__, id(self), self.method, self.get_redacted_uri(), self.clientproto, self.site.site_tag, ) def get_request_id(self): return "%s-%i" % (self.method, self.request_seq) def get_redacted_uri(self): return ACCESS_TOKEN_RE.sub( br'\1<redacted>\3', self.uri ) def get_user_agent(self): return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1] def render(self, resrc): # override the Server header which is set by twisted self.setHeader("Server", self.site.server_version_string) return Request.render(self, resrc) def _started_processing(self, servlet_name): self.start_time = int(time.time() * 1000) self.request_metrics = RequestMetrics() self.request_metrics.start(self.start_time, name=servlet_name) self.site.access_logger.info( "%s - %s - Received request: %s %s", self.getClientIP(), self.site.site_tag, self.method, self.get_redacted_uri() ) def _finished_processing(self): try: context = LoggingContext.current_context() ru_utime, ru_stime = context.get_resource_usage() db_txn_count = context.db_txn_count db_txn_duration_ms = context.db_txn_duration_ms db_sched_duration_ms = context.db_sched_duration_ms except Exception: ru_utime, ru_stime = (0, 0) db_txn_count, db_txn_duration_ms = (0, 0) end_time = int(time.time() * 1000) self.site.access_logger.info( "%s - %s - {%s}" " Processed request: %dms (%dms, %dms) (%dms/%dms/%d)" " %sB %s \"%s %s %s\" \"%s\"", self.getClientIP(), self.site.site_tag, self.authenticated_entity, end_time - self.start_time, int(ru_utime * 1000), int(ru_stime * 1000), db_sched_duration_ms, db_txn_duration_ms, int(db_txn_count), self.sentLength, self.code, self.method, self.get_redacted_uri(), self.clientproto, self.get_user_agent(), ) try: self.request_metrics.stop(end_time, self) except Exception as e: logger.warn("Failed to stop metrics: %r", e) @contextlib.contextmanager def processing(self, servlet_name): """Record the fact that we are processing this request. Returns a context manager; the correct way to use this is: @defer.inlineCallbacks def handle_request(request): with request.processing("FooServlet"): yield really_handle_the_request() This will log the request's arrival. Once the context manager is closed, the completion of the request will be logged, and the various metrics will be updated. Args: servlet_name (str): the name of the servlet which will be processing this request. This is used in the metrics. It is possible to update this afterwards by updating self.request_metrics.servlet_name. """ # TODO: we should probably just move this into render() and finish(), # to save having to call a separate method. self._started_processing(servlet_name) yield self._finished_processing()
class SynapseRequest(Request): """Class which encapsulates an HTTP request to synapse. All of the requests processed in synapse are of this type. It extends twisted's twisted.web.server.Request, and adds: * Unique request ID * A log context associated with the request * Redaction of access_token query-params in __repr__ * Logging at start and end * Metrics to record CPU, wallclock and DB time by endpoint. It also provides a method `processing`, which returns a context manager. If this method is called, the request won't be logged until the context manager is closed; this is useful for asynchronous request handlers which may go on processing the request even after the client has disconnected. Attributes: logcontext(LoggingContext) : the log context for this request """ def __init__(self, channel, *args, **kw): Request.__init__(self, channel, *args, **kw) self.site = channel.site self._channel = channel # this is used by the tests self.authenticated_entity = None self.start_time = 0 # we can't yet create the logcontext, as we don't know the method. self.logcontext = None global _next_request_seq self.request_seq = _next_request_seq _next_request_seq += 1 # whether an asynchronous request handler has called processing() self._is_processing = False # the time when the asynchronous request handler completed its processing self._processing_finished_time = None # what time we finished sending the response to the client (or the connection # dropped) self.finish_time = None def __repr__(self): # We overwrite this so that we don't log ``access_token`` return "<%s at 0x%x method=%r uri=%r clientproto=%r site=%r>" % ( self.__class__.__name__, id(self), self.get_method(), self.get_redacted_uri(), self.clientproto.decode("ascii", errors="replace"), self.site.site_tag, ) def get_request_id(self): return "%s-%i" % (self.get_method(), self.request_seq) def get_redacted_uri(self): uri = self.uri if isinstance(uri, bytes): uri = self.uri.decode("ascii", errors="replace") return redact_uri(uri) def get_method(self): """Gets the method associated with the request (or placeholder if not method has yet been received). Note: This is necessary as the placeholder value in twisted is str rather than bytes, so we need to sanitise `self.method`. Returns: str """ method = self.method if isinstance(method, bytes): method = self.method.decode("ascii") return method def get_user_agent(self): return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1] def render(self, resrc): # this is called once a Resource has been found to serve the request; in our # case the Resource in question will normally be a JsonResource. # create a LogContext for this request request_id = self.get_request_id() logcontext = self.logcontext = LoggingContext(request_id) logcontext.request = request_id # override the Server header which is set by twisted self.setHeader("Server", self.site.server_version_string) with PreserveLoggingContext(self.logcontext): # we start the request metrics timer here with an initial stab # at the servlet name. For most requests that name will be # JsonResource (or a subclass), and JsonResource._async_render # will update it once it picks a servlet. servlet_name = resrc.__class__.__name__ self._started_processing(servlet_name) Request.render(self, resrc) # record the arrival of the request *after* # dispatching to the handler, so that the handler # can update the servlet name in the request # metrics requests_counter.labels(self.get_method(), self.request_metrics.name).inc() @contextlib.contextmanager def processing(self): """Record the fact that we are processing this request. Returns a context manager; the correct way to use this is: @defer.inlineCallbacks def handle_request(request): with request.processing("FooServlet"): yield really_handle_the_request() Once the context manager is closed, the completion of the request will be logged, and the various metrics will be updated. """ if self._is_processing: raise RuntimeError("Request is already processing") self._is_processing = True try: yield except Exception: # this should already have been caught, and sent back to the client as a 500. logger.exception( "Asynchronous messge handler raised an uncaught exception") finally: # the request handler has finished its work and either sent the whole response # back, or handed over responsibility to a Producer. self._processing_finished_time = time.time() self._is_processing = False # if we've already sent the response, log it now; otherwise, we wait for the # response to be sent. if self.finish_time is not None: self._finished_processing() def finish(self): """Called when all response data has been written to this Request. Overrides twisted.web.server.Request.finish to record the finish time and do logging. """ self.finish_time = time.time() Request.finish(self) if not self._is_processing: with PreserveLoggingContext(self.logcontext): self._finished_processing() def connectionLost(self, reason): """Called when the client connection is closed before the response is written. Overrides twisted.web.server.Request.connectionLost to record the finish time and do logging. """ self.finish_time = time.time() Request.connectionLost(self, reason) if self.logcontext is None: logger.info( "Connection from %s lost before request headers were read", self.client) return # we only get here if the connection to the client drops before we send # the response. # # It's useful to log it here so that we can get an idea of when # the client disconnects. with PreserveLoggingContext(self.logcontext): logger.warning("Error processing request %r: %s %s", self, reason.type, reason.value) if not self._is_processing: self._finished_processing() def _started_processing(self, servlet_name): """Record the fact that we are processing this request. This will log the request's arrival. Once the request completes, be sure to call finished_processing. Args: servlet_name (str): the name of the servlet which will be processing this request. This is used in the metrics. It is possible to update this afterwards by updating self.request_metrics.name. """ self.start_time = time.time() self.request_metrics = RequestMetrics() self.request_metrics.start(self.start_time, name=servlet_name, method=self.get_method()) self.site.access_logger.debug( "%s - %s - Received request: %s %s", self.getClientIP(), self.site.site_tag, self.get_method(), self.get_redacted_uri(), ) def _finished_processing(self): """Log the completion of this request and update the metrics """ usage = self.logcontext.get_resource_usage() if self._processing_finished_time is None: # we completed the request without anything calling processing() self._processing_finished_time = time.time() # the time between receiving the request and the request handler finishing processing_time = self._processing_finished_time - self.start_time # the time between the request handler finishing and the response being sent # to the client (nb may be negative) response_send_time = self.finish_time - self._processing_finished_time # need to decode as it could be raw utf-8 bytes # from a IDN servname in an auth header authenticated_entity = self.authenticated_entity if authenticated_entity is not None and isinstance( authenticated_entity, bytes): authenticated_entity = authenticated_entity.decode( "utf-8", "replace") # ...or could be raw utf-8 bytes in the User-Agent header. # N.B. if you don't do this, the logger explodes cryptically # with maximum recursion trying to log errors about # the charset problem. # c.f. https://github.com/matrix-org/synapse/issues/3471 user_agent = self.get_user_agent() if user_agent is not None: user_agent = user_agent.decode("utf-8", "replace") else: user_agent = "-" code = str(self.code) if not self.finished: # we didn't send the full response before we gave up (presumably because # the connection dropped) code += "!" self.site.access_logger.info( "%s - %s - {%s}" " Processed request: %.3fsec/%.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)" ' %sB %s "%s %s %s" "%s" [%d dbevts]', self.getClientIP(), self.site.site_tag, authenticated_entity, processing_time, response_send_time, usage.ru_utime, usage.ru_stime, usage.db_sched_duration_sec, usage.db_txn_duration_sec, int(usage.db_txn_count), self.sentLength, code, self.get_method(), self.get_redacted_uri(), self.clientproto.decode("ascii", errors="replace"), user_agent, usage.evt_db_fetch_count, ) try: self.request_metrics.stop(self.finish_time, self.code, self.sentLength) except Exception as e: logger.warning("Failed to stop metrics: %r", e)
class SynapseRequest(Request): """Class which encapsulates an HTTP request to synapse. All of the requests processed in synapse are of this type. It extends twisted's twisted.web.server.Request, and adds: * Unique request ID * Redaction of access_token query-params in __repr__ * Logging at start and end * Metrics to record CPU, wallclock and DB time by endpoint. It provides a method `processing` which should be called by the Resource which is handling the request, and returns a context manager. """ def __init__(self, site, *args, **kw): Request.__init__(self, *args, **kw) self.site = site self.authenticated_entity = None self.start_time = 0 global _next_request_seq self.request_seq = _next_request_seq _next_request_seq += 1 def __repr__(self): # We overwrite this so that we don't log ``access_token`` return '<%s at 0x%x method=%s uri=%s clientproto=%s site=%s>' % ( self.__class__.__name__, id(self), self.method, self.get_redacted_uri(), self.clientproto, self.site.site_tag, ) def get_request_id(self): return "%s-%i" % (self.method, self.request_seq) def get_redacted_uri(self): return ACCESS_TOKEN_RE.sub(br'\1<redacted>\3', self.uri) def get_user_agent(self): return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1] def render(self, resrc): # override the Server header which is set by twisted self.setHeader("Server", self.site.server_version_string) return Request.render(self, resrc) def _started_processing(self, servlet_name): self.start_time = int(time.time() * 1000) self.request_metrics = RequestMetrics() self.request_metrics.start( self.start_time, name=servlet_name, method=self.method, ) self.site.access_logger.info("%s - %s - Received request: %s %s", self.getClientIP(), self.site.site_tag, self.method, self.get_redacted_uri()) def _finished_processing(self): try: context = LoggingContext.current_context() ru_utime, ru_stime = context.get_resource_usage() db_txn_count = context.db_txn_count db_txn_duration_ms = context.db_txn_duration_ms db_sched_duration_ms = context.db_sched_duration_ms except Exception: ru_utime, ru_stime = (0, 0) db_txn_count, db_txn_duration_ms = (0, 0) end_time = int(time.time() * 1000) self.site.access_logger.info( "%s - %s - {%s}" " Processed request: %dms (%dms, %dms) (%dms/%dms/%d)" " %sB %s \"%s %s %s\" \"%s\"", self.getClientIP(), self.site.site_tag, self.authenticated_entity, end_time - self.start_time, int(ru_utime * 1000), int(ru_stime * 1000), db_sched_duration_ms, db_txn_duration_ms, int(db_txn_count), self.sentLength, self.code, self.method, self.get_redacted_uri(), self.clientproto, self.get_user_agent(), ) try: self.request_metrics.stop(end_time, self) except Exception as e: logger.warn("Failed to stop metrics: %r", e) @contextlib.contextmanager def processing(self, servlet_name): """Record the fact that we are processing this request. Returns a context manager; the correct way to use this is: @defer.inlineCallbacks def handle_request(request): with request.processing("FooServlet"): yield really_handle_the_request() This will log the request's arrival. Once the context manager is closed, the completion of the request will be logged, and the various metrics will be updated. Args: servlet_name (str): the name of the servlet which will be processing this request. This is used in the metrics. It is possible to update this afterwards by updating self.request_metrics.servlet_name. """ # TODO: we should probably just move this into render() and finish(), # to save having to call a separate method. self._started_processing(servlet_name) yield self._finished_processing()
class SynapseRequest(Request): """Class which encapsulates an HTTP request to synapse. All of the requests processed in synapse are of this type. It extends twisted's twisted.web.server.Request, and adds: * Unique request ID * Redaction of access_token query-params in __repr__ * Logging at start and end * Metrics to record CPU, wallclock and DB time by endpoint. It provides a method `processing` which should be called by the Resource which is handling the request, and returns a context manager. """ def __init__(self, site, *args, **kw): Request.__init__(self, *args, **kw) self.site = site self.authenticated_entity = None self.start_time = 0 global _next_request_seq self.request_seq = _next_request_seq _next_request_seq += 1 def __repr__(self): # We overwrite this so that we don't log ``access_token`` return '<%s at 0x%x method=%r uri=%r clientproto=%r site=%r>' % ( self.__class__.__name__, id(self), self.method, self.get_redacted_uri(), self.clientproto, self.site.site_tag, ) def get_request_id(self): return "%s-%i" % (self.method, self.request_seq) def get_redacted_uri(self): return redact_uri(self.uri) def get_user_agent(self): return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1] def render(self, resrc): # override the Server header which is set by twisted self.setHeader("Server", self.site.server_version_string) return Request.render(self, resrc) def _started_processing(self, servlet_name): self.start_time = time.time() self.request_metrics = RequestMetrics() self.request_metrics.start( self.start_time, name=servlet_name, method=self.method, ) self.site.access_logger.info( "%s - %s - Received request: %s %s", self.getClientIP(), self.site.site_tag, self.method, self.get_redacted_uri() ) def _finished_processing(self): try: context = LoggingContext.current_context() ru_utime, ru_stime = context.get_resource_usage() db_txn_count = context.db_txn_count db_txn_duration_sec = context.db_txn_duration_sec db_sched_duration_sec = context.db_sched_duration_sec evt_db_fetch_count = context.evt_db_fetch_count except Exception: ru_utime, ru_stime = (0, 0) db_txn_count, db_txn_duration_sec = (0, 0) evt_db_fetch_count = 0 end_time = time.time() # need to decode as it could be raw utf-8 bytes # from a IDN servname in an auth header authenticated_entity = self.authenticated_entity if authenticated_entity is not None: authenticated_entity = authenticated_entity.decode("utf-8", "replace") # ...or could be raw utf-8 bytes in the User-Agent header. # N.B. if you don't do this, the logger explodes cryptically # with maximum recursion trying to log errors about # the charset problem. # c.f. https://github.com/matrix-org/synapse/issues/3471 user_agent = self.get_user_agent() if user_agent is not None: user_agent = user_agent.decode("utf-8", "replace") self.site.access_logger.info( "%s - %s - {%s}" " Processed request: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)" " %sB %s \"%s %s %s\" \"%s\" [%d dbevts]", self.getClientIP(), self.site.site_tag, authenticated_entity, end_time - self.start_time, ru_utime, ru_stime, db_sched_duration_sec, db_txn_duration_sec, int(db_txn_count), self.sentLength, self.code, self.method, self.get_redacted_uri(), self.clientproto, user_agent, evt_db_fetch_count, ) try: self.request_metrics.stop(end_time, self) except Exception as e: logger.warn("Failed to stop metrics: %r", e) @contextlib.contextmanager def processing(self, servlet_name): """Record the fact that we are processing this request. Returns a context manager; the correct way to use this is: @defer.inlineCallbacks def handle_request(request): with request.processing("FooServlet"): yield really_handle_the_request() This will log the request's arrival. Once the context manager is closed, the completion of the request will be logged, and the various metrics will be updated. Args: servlet_name (str): the name of the servlet which will be processing this request. This is used in the metrics. It is possible to update this afterwards by updating self.request_metrics.servlet_name. """ # TODO: we should probably just move this into render() and finish(), # to save having to call a separate method. self._started_processing(servlet_name) yield self._finished_processing()
class SynapseRequest(Request): """Class which encapsulates an HTTP request to synapse. All of the requests processed in synapse are of this type. It extends twisted's twisted.web.server.Request, and adds: * Unique request ID * A log context associated with the request * Redaction of access_token query-params in __repr__ * Logging at start and end * Metrics to record CPU, wallclock and DB time by endpoint. * A limit to the size of request which will be accepted It also provides a method `processing`, which returns a context manager. If this method is called, the request won't be logged until the context manager is closed; this is useful for asynchronous request handlers which may go on processing the request even after the client has disconnected. Attributes: logcontext: the log context for this request """ def __init__( self, channel: HTTPChannel, site: "SynapseSite", *args: Any, max_request_body_size: int = 1024, **kw: Any, ): super().__init__(channel, *args, **kw) self._max_request_body_size = max_request_body_size self.synapse_site = site self.reactor = site.reactor self._channel = channel # this is used by the tests self.start_time = 0.0 # The requester, if authenticated. For federation requests this is the # server name, for client requests this is the Requester object. self._requester: Optional[Union[Requester, str]] = None # An opentracing span for this request. Will be closed when the request is # completely processed. self._opentracing_span: "Optional[opentracing.Span]" = None # we can't yet create the logcontext, as we don't know the method. self.logcontext: Optional[LoggingContext] = None # The `Deferred` to cancel if the client disconnects early and # `is_render_cancellable` is set. Expected to be set by `Resource.render`. self.render_deferred: Optional["Deferred[None]"] = None # A boolean indicating whether `render_deferred` should be cancelled if the # client disconnects early. Expected to be set by the coroutine started by # `Resource.render`, if rendering is asynchronous. self.is_render_cancellable = False global _next_request_seq self.request_seq = _next_request_seq _next_request_seq += 1 # whether an asynchronous request handler has called processing() self._is_processing = False # the time when the asynchronous request handler completed its processing self._processing_finished_time: Optional[float] = None # what time we finished sending the response to the client (or the connection # dropped) self.finish_time: Optional[float] = None def __repr__(self) -> str: # We overwrite this so that we don't log ``access_token`` return "<%s at 0x%x method=%r uri=%r clientproto=%r site=%r>" % ( self.__class__.__name__, id(self), self.get_method(), self.get_redacted_uri(), self.clientproto.decode("ascii", errors="replace"), self.synapse_site.site_tag, ) def handleContentChunk(self, data: bytes) -> None: # we should have a `content` by now. assert self.content, "handleContentChunk() called before gotLength()" if self.content.tell() + len(data) > self._max_request_body_size: logger.warning( "Aborting connection from %s because the request exceeds maximum size: %s %s", self.client, self.get_method(), self.get_redacted_uri(), ) self.transport.abortConnection() return super().handleContentChunk(data) @property def requester(self) -> Optional[Union[Requester, str]]: return self._requester @requester.setter def requester(self, value: Union[Requester, str]) -> None: # Store the requester, and update some properties based on it. # This should only be called once. assert self._requester is None self._requester = value # A logging context should exist by now (and have a ContextRequest). assert self.logcontext is not None assert self.logcontext.request is not None ( requester, authenticated_entity, ) = self.get_authenticated_entity() self.logcontext.request.requester = requester # If there's no authenticated entity, it was the requester. self.logcontext.request.authenticated_entity = authenticated_entity or requester def set_opentracing_span(self, span: "opentracing.Span") -> None: """attach an opentracing span to this request Doing so will cause the span to be closed when we finish processing the request """ self._opentracing_span = span def get_request_id(self) -> str: return "%s-%i" % (self.get_method(), self.request_seq) def get_redacted_uri(self) -> str: """Gets the redacted URI associated with the request (or placeholder if the URI has not yet been received). Note: This is necessary as the placeholder value in twisted is str rather than bytes, so we need to sanitise `self.uri`. Returns: The redacted URI as a string. """ uri: Union[bytes, str] = self.uri if isinstance(uri, bytes): uri = uri.decode("ascii", errors="replace") return redact_uri(uri) def get_method(self) -> str: """Gets the method associated with the request (or placeholder if method has not yet been received). Note: This is necessary as the placeholder value in twisted is str rather than bytes, so we need to sanitise `self.method`. Returns: The request method as a string. """ method: Union[bytes, str] = self.method if isinstance(method, bytes): return self.method.decode("ascii") return method def get_authenticated_entity(self) -> Tuple[Optional[str], Optional[str]]: """ Get the "authenticated" entity of the request, which might be the user performing the action, or a user being puppeted by a server admin. Returns: A tuple: The first item is a string representing the user making the request. The second item is a string or None representing the user who authenticated when making this request. See Requester.authenticated_entity. """ # Convert the requester into a string that we can log if isinstance(self._requester, str): return self._requester, None elif isinstance(self._requester, Requester): requester = self._requester.user.to_string() authenticated_entity = self._requester.authenticated_entity # If this is a request where the target user doesn't match the user who # authenticated (e.g. and admin is puppetting a user) then we return both. if self._requester.user.to_string() != authenticated_entity: return requester, authenticated_entity return requester, None elif self._requester is not None: # This shouldn't happen, but we log it so we don't lose information # and can see that we're doing something wrong. return repr(self._requester), None # type: ignore[unreachable] return None, None def render(self, resrc: Resource) -> None: # this is called once a Resource has been found to serve the request; in our # case the Resource in question will normally be a JsonResource. # create a LogContext for this request request_id = self.get_request_id() self.logcontext = LoggingContext( request_id, request=ContextRequest( request_id=request_id, ip_address=self.getClientAddress().host, site_tag=self.synapse_site.site_tag, # The requester is going to be unknown at this point. requester=None, authenticated_entity=None, method=self.get_method(), url=self.get_redacted_uri(), protocol=self.clientproto.decode("ascii", errors="replace"), user_agent=get_request_user_agent(self), ), ) # override the Server header which is set by twisted self.setHeader("Server", self.synapse_site.server_version_string) with PreserveLoggingContext(self.logcontext): # we start the request metrics timer here with an initial stab # at the servlet name. For most requests that name will be # JsonResource (or a subclass), and JsonResource._async_render # will update it once it picks a servlet. servlet_name = resrc.__class__.__name__ self._started_processing(servlet_name) Request.render(self, resrc) # record the arrival of the request *after* # dispatching to the handler, so that the handler # can update the servlet name in the request # metrics requests_counter.labels(self.get_method(), self.request_metrics.name).inc() @contextlib.contextmanager def processing(self) -> Generator[None, None, None]: """Record the fact that we are processing this request. Returns a context manager; the correct way to use this is: async def handle_request(request): with request.processing("FooServlet"): await really_handle_the_request() Once the context manager is closed, the completion of the request will be logged, and the various metrics will be updated. """ if self._is_processing: raise RuntimeError("Request is already processing") self._is_processing = True try: yield except Exception: # this should already have been caught, and sent back to the client as a 500. logger.exception( "Asynchronous message handler raised an uncaught exception") finally: # the request handler has finished its work and either sent the whole response # back, or handed over responsibility to a Producer. self._processing_finished_time = time.time() self._is_processing = False if self._opentracing_span: self._opentracing_span.log_kv({"event": "finished processing"}) # if we've already sent the response, log it now; otherwise, we wait for the # response to be sent. if self.finish_time is not None: self._finished_processing() def finish(self) -> None: """Called when all response data has been written to this Request. Overrides twisted.web.server.Request.finish to record the finish time and do logging. """ self.finish_time = time.time() Request.finish(self) if self._opentracing_span: self._opentracing_span.log_kv({"event": "response sent"}) if not self._is_processing: assert self.logcontext is not None with PreserveLoggingContext(self.logcontext): self._finished_processing() def connectionLost(self, reason: Union[Failure, Exception]) -> None: """Called when the client connection is closed before the response is written. Overrides twisted.web.server.Request.connectionLost to record the finish time and do logging. """ # There is a bug in Twisted where reason is not wrapped in a Failure object # Detect this and wrap it manually as a workaround # More information: https://github.com/matrix-org/synapse/issues/7441 if not isinstance(reason, Failure): reason = Failure(reason) self.finish_time = time.time() Request.connectionLost(self, reason) if self.logcontext is None: logger.info( "Connection from %s lost before request headers were read", self.client) return # we only get here if the connection to the client drops before we send # the response. # # It's useful to log it here so that we can get an idea of when # the client disconnects. with PreserveLoggingContext(self.logcontext): logger.info("Connection from client lost before response was sent") if self._opentracing_span: self._opentracing_span.log_kv({ "event": "client connection lost", "reason": str(reason.value) }) if self._is_processing: if self.is_render_cancellable: if self.render_deferred is not None: # Throw a cancellation into the request processing, in the hope # that it will finish up sooner than it normally would. # The `self.processing()` context manager will call # `_finished_processing()` when done. with PreserveLoggingContext(): self.render_deferred.cancel() else: logger.error( "Connection from client lost, but have no Deferred to " "cancel even though the request is marked as cancellable." ) else: self._finished_processing() def _started_processing(self, servlet_name: str) -> None: """Record the fact that we are processing this request. This will log the request's arrival. Once the request completes, be sure to call finished_processing. Args: servlet_name (str): the name of the servlet which will be processing this request. This is used in the metrics. It is possible to update this afterwards by updating self.request_metrics.name. """ self.start_time = time.time() self.request_metrics = RequestMetrics() self.request_metrics.start(self.start_time, name=servlet_name, method=self.get_method()) self.synapse_site.access_logger.debug( "%s - %s - Received request: %s %s", self.getClientAddress().host, self.synapse_site.site_tag, self.get_method(), self.get_redacted_uri(), ) def _finished_processing(self) -> None: """Log the completion of this request and update the metrics""" assert self.logcontext is not None assert self.finish_time is not None usage = self.logcontext.get_resource_usage() if self._processing_finished_time is None: # we completed the request without anything calling processing() self._processing_finished_time = time.time() # the time between receiving the request and the request handler finishing processing_time = self._processing_finished_time - self.start_time # the time between the request handler finishing and the response being sent # to the client (nb may be negative) response_send_time = self.finish_time - self._processing_finished_time user_agent = get_request_user_agent(self, "-") # int(self.code) looks redundant, because self.code is already an int. # But self.code might be an HTTPStatus (which inherits from int)---which has # a different string representation. So ensure we really have an integer. code = str(int(self.code)) if not self.finished: # we didn't send the full response before we gave up (presumably because # the connection dropped) code += "!" log_level = logging.INFO if self._should_log_request( ) else logging.DEBUG # If this is a request where the target user doesn't match the user who # authenticated (e.g. and admin is puppetting a user) then we log both. requester, authenticated_entity = self.get_authenticated_entity() if authenticated_entity: requester = f"{authenticated_entity}|{requester}" self.synapse_site.access_logger.log( log_level, "%s - %s - {%s}" " Processed request: %.3fsec/%.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)" ' %sB %s "%s %s %s" "%s" [%d dbevts]', self.getClientAddress().host, self.synapse_site.site_tag, requester, processing_time, response_send_time, usage.ru_utime, usage.ru_stime, usage.db_sched_duration_sec, usage.db_txn_duration_sec, int(usage.db_txn_count), self.sentLength, code, self.get_method(), self.get_redacted_uri(), self.clientproto.decode("ascii", errors="replace"), user_agent, usage.evt_db_fetch_count, ) # complete the opentracing span, if any. if self._opentracing_span: self._opentracing_span.finish() try: self.request_metrics.stop(self.finish_time, self.code, self.sentLength) except Exception as e: logger.warning("Failed to stop metrics: %r", e) def _should_log_request(self) -> bool: """Whether we should log at INFO that we processed the request.""" if self.path == b"/health": return False if self.method == b"OPTIONS": return False return True
class SynapseRequest(Request): """Class which encapsulates an HTTP request to synapse. All of the requests processed in synapse are of this type. It extends twisted's twisted.web.server.Request, and adds: * Unique request ID * A log context associated with the request * Redaction of access_token query-params in __repr__ * Logging at start and end * Metrics to record CPU, wallclock and DB time by endpoint. It also provides a method `processing`, which returns a context manager. If this method is called, the request won't be logged until the context manager is closed; this is useful for asynchronous request handlers which may go on processing the request even after the client has disconnected. Attributes: logcontext(LoggingContext) : the log context for this request """ def __init__(self, site, channel, *args, **kw): Request.__init__(self, channel, *args, **kw) self.site = site self._channel = channel # this is used by the tests self.authenticated_entity = None self.start_time = 0 # we can't yet create the logcontext, as we don't know the method. self.logcontext = None global _next_request_seq self.request_seq = _next_request_seq _next_request_seq += 1 # whether an asynchronous request handler has called processing() self._is_processing = False # the time when the asynchronous request handler completed its processing self._processing_finished_time = None # what time we finished sending the response to the client (or the connection # dropped) self.finish_time = None def __repr__(self): # We overwrite this so that we don't log ``access_token`` return '<%s at 0x%x method=%r uri=%r clientproto=%r site=%r>' % ( self.__class__.__name__, id(self), self.get_method(), self.get_redacted_uri(), self.clientproto.decode('ascii', errors='replace'), self.site.site_tag, ) def get_request_id(self): return "%s-%i" % (self.get_method(), self.request_seq) def get_redacted_uri(self): uri = self.uri if isinstance(uri, bytes): uri = self.uri.decode('ascii') return redact_uri(uri) def get_method(self): """Gets the method associated with the request (or placeholder if not method has yet been received). Note: This is necessary as the placeholder value in twisted is str rather than bytes, so we need to sanitise `self.method`. Returns: str """ method = self.method if isinstance(method, bytes): method = self.method.decode('ascii') return method def get_user_agent(self): return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1] def render(self, resrc): # this is called once a Resource has been found to serve the request; in our # case the Resource in question will normally be a JsonResource. # create a LogContext for this request request_id = self.get_request_id() logcontext = self.logcontext = LoggingContext(request_id) logcontext.request = request_id # override the Server header which is set by twisted self.setHeader("Server", self.site.server_version_string) with PreserveLoggingContext(self.logcontext): # we start the request metrics timer here with an initial stab # at the servlet name. For most requests that name will be # JsonResource (or a subclass), and JsonResource._async_render # will update it once it picks a servlet. servlet_name = resrc.__class__.__name__ self._started_processing(servlet_name) Request.render(self, resrc) # record the arrival of the request *after* # dispatching to the handler, so that the handler # can update the servlet name in the request # metrics requests_counter.labels(self.get_method(), self.request_metrics.name).inc() @contextlib.contextmanager def processing(self): """Record the fact that we are processing this request. Returns a context manager; the correct way to use this is: @defer.inlineCallbacks def handle_request(request): with request.processing("FooServlet"): yield really_handle_the_request() Once the context manager is closed, the completion of the request will be logged, and the various metrics will be updated. """ if self._is_processing: raise RuntimeError("Request is already processing") self._is_processing = True try: yield except Exception: # this should already have been caught, and sent back to the client as a 500. logger.exception("Asynchronous messge handler raised an uncaught exception") finally: # the request handler has finished its work and either sent the whole response # back, or handed over responsibility to a Producer. self._processing_finished_time = time.time() self._is_processing = False # if we've already sent the response, log it now; otherwise, we wait for the # response to be sent. if self.finish_time is not None: self._finished_processing() def finish(self): """Called when all response data has been written to this Request. Overrides twisted.web.server.Request.finish to record the finish time and do logging. """ self.finish_time = time.time() Request.finish(self) if not self._is_processing: with PreserveLoggingContext(self.logcontext): self._finished_processing() def connectionLost(self, reason): """Called when the client connection is closed before the response is written. Overrides twisted.web.server.Request.connectionLost to record the finish time and do logging. """ self.finish_time = time.time() Request.connectionLost(self, reason) # we only get here if the connection to the client drops before we send # the response. # # It's useful to log it here so that we can get an idea of when # the client disconnects. with PreserveLoggingContext(self.logcontext): logger.warn( "Error processing request %r: %s %s", self, reason.type, reason.value, ) if not self._is_processing: self._finished_processing() def _started_processing(self, servlet_name): """Record the fact that we are processing this request. This will log the request's arrival. Once the request completes, be sure to call finished_processing. Args: servlet_name (str): the name of the servlet which will be processing this request. This is used in the metrics. It is possible to update this afterwards by updating self.request_metrics.name. """ self.start_time = time.time() self.request_metrics = RequestMetrics() self.request_metrics.start( self.start_time, name=servlet_name, method=self.get_method(), ) self.site.access_logger.info( "%s - %s - Received request: %s %s", self.getClientIP(), self.site.site_tag, self.get_method(), self.get_redacted_uri() ) def _finished_processing(self): """Log the completion of this request and update the metrics """ if self.logcontext is None: # this can happen if the connection closed before we read the # headers (so render was never called). In that case we'll already # have logged a warning, so just bail out. return usage = self.logcontext.get_resource_usage() if self._processing_finished_time is None: # we completed the request without anything calling processing() self._processing_finished_time = time.time() # the time between receiving the request and the request handler finishing processing_time = self._processing_finished_time - self.start_time # the time between the request handler finishing and the response being sent # to the client (nb may be negative) response_send_time = self.finish_time - self._processing_finished_time # need to decode as it could be raw utf-8 bytes # from a IDN servname in an auth header authenticated_entity = self.authenticated_entity if authenticated_entity is not None and isinstance(authenticated_entity, bytes): authenticated_entity = authenticated_entity.decode("utf-8", "replace") # ...or could be raw utf-8 bytes in the User-Agent header. # N.B. if you don't do this, the logger explodes cryptically # with maximum recursion trying to log errors about # the charset problem. # c.f. https://github.com/matrix-org/synapse/issues/3471 user_agent = self.get_user_agent() if user_agent is not None: user_agent = user_agent.decode("utf-8", "replace") else: user_agent = "-" code = str(self.code) if not self.finished: # we didn't send the full response before we gave up (presumably because # the connection dropped) code += "!" self.site.access_logger.info( "%s - %s - {%s}" " Processed request: %.3fsec/%.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)" " %sB %s \"%s %s %s\" \"%s\" [%d dbevts]", self.getClientIP(), self.site.site_tag, authenticated_entity, processing_time, response_send_time, usage.ru_utime, usage.ru_stime, usage.db_sched_duration_sec, usage.db_txn_duration_sec, int(usage.db_txn_count), self.sentLength, code, self.get_method(), self.get_redacted_uri(), self.clientproto.decode('ascii', errors='replace'), user_agent, usage.evt_db_fetch_count, ) try: self.request_metrics.stop(self.finish_time, self.code, self.sentLength) except Exception as e: logger.warn("Failed to stop metrics: %r", e)