def record_request_restart_data(log_data): if settings.PROFILE_ALL_REQUESTS: log_data["prof"].enable() log_data['time_restarted'] = time.time() log_data['remote_cache_time_restarted'] = get_remote_cache_time() log_data['remote_cache_requests_restarted'] = get_remote_cache_requests() log_data['bugdown_time_restarted'] = get_bugdown_time() log_data['bugdown_requests_restarted'] = get_bugdown_requests()
def record_request_stop_data(log_data): log_data['time_stopped'] = time.time() log_data['remote_cache_time_stopped'] = get_remote_cache_time() log_data['remote_cache_requests_stopped'] = get_remote_cache_requests() log_data['bugdown_time_stopped'] = get_bugdown_time() log_data['bugdown_requests_stopped'] = get_bugdown_requests() if settings.PROFILE_ALL_REQUESTS: log_data["prof"].disable()
def record_request_restart_data(log_data): # type: (MutableMapping[str, Any]) -> None if settings.PROFILE_ALL_REQUESTS: log_data["prof"].enable() log_data['time_restarted'] = time.time() log_data['remote_cache_time_restarted'] = get_remote_cache_time() log_data['remote_cache_requests_restarted'] = get_remote_cache_requests() log_data['bugdown_time_restarted'] = get_bugdown_time() log_data['bugdown_requests_restarted'] = get_bugdown_requests()
def record_request_start_data(log_data): if settings.PROFILE_ALL_REQUESTS: log_data["prof"] = cProfile.Profile() log_data["prof"].enable() log_data['time_started'] = time.time() log_data['memcached_time_start'] = get_memcached_time() log_data['memcached_requests_start'] = get_memcached_requests() log_data['bugdown_time_start'] = get_bugdown_time() log_data['bugdown_requests_start'] = get_bugdown_requests()
def record_request_start_data(log_data: MutableMapping[str, Any]) -> None: if settings.PROFILE_ALL_REQUESTS: log_data["prof"] = cProfile.Profile() log_data["prof"].enable() reset_queries() log_data['time_started'] = time.time() log_data['remote_cache_time_start'] = get_remote_cache_time() log_data['remote_cache_requests_start'] = get_remote_cache_requests() log_data['bugdown_time_start'] = get_bugdown_time() log_data['bugdown_requests_start'] = get_bugdown_requests()
def write_log_line(log_data, path, method, remote_ip, email, client_name, status_code=200, error_content=None, error_content_iter=None): # type: (MutableMapping[str, Any], Text, str, str, Text, Text, int, Optional[AnyStr], Optional[Iterable[AnyStr]]) -> None assert error_content is None or error_content_iter is None if error_content is not None: error_content_iter = (error_content,) # For statsd timer name if path == '/': statsd_path = u'webreq' else: statsd_path = u"webreq.%s" % (path[1:].replace('/', '.'),) # Remove non-ascii chars from path (there should be none, if there are it's # because someone manually entered a nonexistant path), as UTF-8 chars make # statsd sad when it sends the key name over the socket statsd_path = statsd_path.encode('ascii', errors='ignore').decode("ascii") blacklisted_requests = ['do_confirm', 'send_confirm', 'eventslast_event_id', 'webreq.content', 'avatar', 'user_uploads', 'password.reset', 'static', 'json.bots', 'json.users', 'json.streams', 'accounts.unsubscribe', 'apple-touch-icon', 'emoji', 'json.bots', 'upload_file', 'realm_activity', 'user_activity'] suppress_statsd = any((blacklisted in statsd_path for blacklisted in blacklisted_requests)) time_delta = -1 # A time duration of -1 means the StartLogRequests middleware # didn't run for some reason optional_orig_delta = "" if 'time_started' in log_data: time_delta = time.time() - log_data['time_started'] if 'time_stopped' in log_data: orig_time_delta = time_delta time_delta = ((log_data['time_stopped'] - log_data['time_started']) + (time.time() - log_data['time_restarted'])) optional_orig_delta = " (lp: %s)" % (format_timedelta(orig_time_delta),) remote_cache_output = "" if 'remote_cache_time_start' in log_data: remote_cache_time_delta = get_remote_cache_time() - log_data['remote_cache_time_start'] remote_cache_count_delta = get_remote_cache_requests() - log_data['remote_cache_requests_start'] if 'remote_cache_requests_stopped' in log_data: # (now - restarted) + (stopped - start) = (now - start) + (stopped - restarted) remote_cache_time_delta += (log_data['remote_cache_time_stopped'] - log_data['remote_cache_time_restarted']) remote_cache_count_delta += (log_data['remote_cache_requests_stopped'] - log_data['remote_cache_requests_restarted']) if (remote_cache_time_delta > 0.005): remote_cache_output = " (mem: %s/%s)" % (format_timedelta(remote_cache_time_delta), remote_cache_count_delta) if not suppress_statsd: statsd.timing("%s.remote_cache.time" % (statsd_path,), timedelta_ms(remote_cache_time_delta)) statsd.incr("%s.remote_cache.querycount" % (statsd_path,), remote_cache_count_delta) startup_output = "" if 'startup_time_delta' in log_data and log_data["startup_time_delta"] > 0.005: startup_output = " (+start: %s)" % (format_timedelta(log_data["startup_time_delta"])) bugdown_output = "" if 'bugdown_time_start' in log_data: bugdown_time_delta = get_bugdown_time() - log_data['bugdown_time_start'] bugdown_count_delta = get_bugdown_requests() - log_data['bugdown_requests_start'] if 'bugdown_requests_stopped' in log_data: # (now - restarted) + (stopped - start) = (now - start) + (stopped - restarted) bugdown_time_delta += (log_data['bugdown_time_stopped'] - log_data['bugdown_time_restarted']) bugdown_count_delta += (log_data['bugdown_requests_stopped'] - log_data['bugdown_requests_restarted']) if (bugdown_time_delta > 0.005): bugdown_output = " (md: %s/%s)" % (format_timedelta(bugdown_time_delta), bugdown_count_delta) if not suppress_statsd: statsd.timing("%s.markdown.time" % (statsd_path,), timedelta_ms(bugdown_time_delta)) statsd.incr("%s.markdown.count" % (statsd_path,), bugdown_count_delta) # Get the amount of time spent doing database queries db_time_output = "" queries = connection.connection.queries if connection.connection is not None else [] if len(queries) > 0: query_time = sum(float(query.get('time', 0)) for query in queries) db_time_output = " (db: %s/%sq)" % (format_timedelta(query_time), len(queries)) if not suppress_statsd: # Log ms, db ms, and num queries to statsd statsd.timing("%s.dbtime" % (statsd_path,), timedelta_ms(query_time)) statsd.incr("%s.dbq" % (statsd_path,), len(queries)) statsd.timing("%s.total" % (statsd_path,), timedelta_ms(time_delta)) if 'extra' in log_data: extra_request_data = " %s" % (log_data['extra'],) else: extra_request_data = "" logger_client = "(%s via %s)" % (email, client_name) logger_timing = ('%5s%s%s%s%s%s %s' % (format_timedelta(time_delta), optional_orig_delta, remote_cache_output, bugdown_output, db_time_output, startup_output, path)) logger_line = ('%-15s %-7s %3d %s%s %s' % (remote_ip, method, status_code, logger_timing, extra_request_data, logger_client)) if (status_code in [200, 304] and method == "GET" and path.startswith("/static")): logger.debug(logger_line) else: logger.info(logger_line) if (is_slow_query(time_delta, path)): queue_json_publish("slow_queries", "%s (%s)" % (logger_line, email), lambda e: None) if settings.PROFILE_ALL_REQUESTS: log_data["prof"].disable() profile_path = "/tmp/profile.data.%s.%s" % (path.split("/")[-1], int(time_delta * 1000),) log_data["prof"].dump_stats(profile_path) # Log some additional data whenever we return certain 40x errors if 400 <= status_code < 500 and status_code not in [401, 404, 405]: assert error_content_iter is not None error_content_list = list(error_content_iter) if error_content_list: error_data = u'' elif isinstance(error_content_list[0], Text): error_data = u''.join(error_content_list) elif isinstance(error_content_list[0], binary_type): error_data = repr(b''.join(error_content_list)) if len(error_data) > 100: error_data = u"[content more than 100 characters]" logger.info('status=%3d, data=%s, uid=%s' % (status_code, error_data, email))
def write_log_line(log_data, path, method, remote_ip, email, client_name, status_code=200, error_content=None, error_content_iter=None): # type: (MutableMapping[str, Any], Text, str, str, Text, Text, int, Optional[AnyStr], Optional[Iterable[AnyStr]]) -> None assert error_content is None or error_content_iter is None if error_content is not None: error_content_iter = (error_content, ) # For statsd timer name if path == '/': statsd_path = u'webreq' else: statsd_path = u"webreq.%s" % (path[1:].replace('/', '.'), ) # Remove non-ascii chars from path (there should be none, if there are it's # because someone manually entered a nonexistant path), as UTF-8 chars make # statsd sad when it sends the key name over the socket statsd_path = statsd_path.encode('ascii', errors='ignore').decode("ascii") blacklisted_requests = [ 'do_confirm', 'send_confirm', 'eventslast_event_id', 'webreq.content', 'avatar', 'user_uploads', 'password.reset', 'static', 'json.bots', 'json.users', 'json.streams', 'accounts.unsubscribe', 'apple-touch-icon', 'emoji', 'json.bots', 'upload_file', 'realm_activity', 'user_activity' ] suppress_statsd = any( (blacklisted in statsd_path for blacklisted in blacklisted_requests)) time_delta = -1 # A time duration of -1 means the StartLogRequests middleware # didn't run for some reason optional_orig_delta = "" if 'time_started' in log_data: time_delta = time.time() - log_data['time_started'] if 'time_stopped' in log_data: orig_time_delta = time_delta time_delta = ((log_data['time_stopped'] - log_data['time_started']) + (time.time() - log_data['time_restarted'])) optional_orig_delta = " (lp: %s)" % ( format_timedelta(orig_time_delta), ) remote_cache_output = "" if 'remote_cache_time_start' in log_data: remote_cache_time_delta = get_remote_cache_time( ) - log_data['remote_cache_time_start'] remote_cache_count_delta = get_remote_cache_requests( ) - log_data['remote_cache_requests_start'] if 'remote_cache_requests_stopped' in log_data: # (now - restarted) + (stopped - start) = (now - start) + (stopped - restarted) remote_cache_time_delta += ( log_data['remote_cache_time_stopped'] - log_data['remote_cache_time_restarted']) remote_cache_count_delta += ( log_data['remote_cache_requests_stopped'] - log_data['remote_cache_requests_restarted']) if (remote_cache_time_delta > 0.005): remote_cache_output = " (mem: %s/%s)" % (format_timedelta( remote_cache_time_delta), remote_cache_count_delta) if not suppress_statsd: statsd.timing("%s.remote_cache.time" % (statsd_path, ), timedelta_ms(remote_cache_time_delta)) statsd.incr("%s.remote_cache.querycount" % (statsd_path, ), remote_cache_count_delta) startup_output = "" if 'startup_time_delta' in log_data and log_data[ "startup_time_delta"] > 0.005: startup_output = " (+start: %s)" % (format_timedelta( log_data["startup_time_delta"])) bugdown_output = "" if 'bugdown_time_start' in log_data: bugdown_time_delta = get_bugdown_time( ) - log_data['bugdown_time_start'] bugdown_count_delta = get_bugdown_requests( ) - log_data['bugdown_requests_start'] if 'bugdown_requests_stopped' in log_data: # (now - restarted) + (stopped - start) = (now - start) + (stopped - restarted) bugdown_time_delta += (log_data['bugdown_time_stopped'] - log_data['bugdown_time_restarted']) bugdown_count_delta += (log_data['bugdown_requests_stopped'] - log_data['bugdown_requests_restarted']) if (bugdown_time_delta > 0.005): bugdown_output = " (md: %s/%s)" % ( format_timedelta(bugdown_time_delta), bugdown_count_delta) if not suppress_statsd: statsd.timing("%s.markdown.time" % (statsd_path, ), timedelta_ms(bugdown_time_delta)) statsd.incr("%s.markdown.count" % (statsd_path, ), bugdown_count_delta) # Get the amount of time spent doing database queries db_time_output = "" queries = connection.connection.queries if connection.connection is not None else [] if len(queries) > 0: query_time = sum(float(query.get('time', 0)) for query in queries) db_time_output = " (db: %s/%sq)" % (format_timedelta(query_time), len(queries)) if not suppress_statsd: # Log ms, db ms, and num queries to statsd statsd.timing("%s.dbtime" % (statsd_path, ), timedelta_ms(query_time)) statsd.incr("%s.dbq" % (statsd_path, ), len(queries)) statsd.timing("%s.total" % (statsd_path, ), timedelta_ms(time_delta)) if 'extra' in log_data: extra_request_data = " %s" % (log_data['extra'], ) else: extra_request_data = "" logger_client = "(%s via %s)" % (email, client_name) logger_timing = ('%5s%s%s%s%s%s %s' % (format_timedelta(time_delta), optional_orig_delta, remote_cache_output, bugdown_output, db_time_output, startup_output, path)) logger_line = ('%-15s %-7s %3d %s%s %s' % (remote_ip, method, status_code, logger_timing, extra_request_data, logger_client)) if (status_code in [200, 304] and method == "GET" and path.startswith("/static")): logger.debug(logger_line) else: logger.info(logger_line) if (is_slow_query(time_delta, path)): # Since the slow query worker patches code, we can't directly # use call_consume_in_tests here without further work. queue_json_publish("slow_queries", "%s (%s)" % (logger_line, email), lambda e: None) if settings.PROFILE_ALL_REQUESTS: log_data["prof"].disable() profile_path = "/tmp/profile.data.%s.%s" % ( path.split("/")[-1], int(time_delta * 1000), ) log_data["prof"].dump_stats(profile_path) # Log some additional data whenever we return certain 40x errors if 400 <= status_code < 500 and status_code not in [401, 404, 405]: assert error_content_iter is not None error_content_list = list(error_content_iter) if error_content_list: error_data = u'' elif isinstance(error_content_list[0], Text): error_data = u''.join(error_content_list) elif isinstance(error_content_list[0], binary_type): error_data = repr(b''.join(error_content_list)) if len(error_data) > 100: error_data = u"[content more than 100 characters]" logger.info('status=%3d, data=%s, uid=%s' % (status_code, error_data, email))
def write_log_line( log_data: MutableMapping[str, Any], path: str, method: str, remote_ip: str, requestor_for_logs: str, client_name: str, status_code: int = 200, error_content: Optional[AnyStr] = None, error_content_iter: Optional[Iterable[AnyStr]] = None) -> None: assert error_content is None or error_content_iter is None if error_content is not None: error_content_iter = (error_content, ) if settings.STATSD_HOST != '': # For statsd timer name if path == '/': statsd_path = 'webreq' else: statsd_path = "webreq.%s" % (path[1:].replace('/', '.'), ) # Remove non-ascii chars from path (there should be none, if there are it's # because someone manually entered a nonexistent path), as UTF-8 chars make # statsd sad when it sends the key name over the socket statsd_path = statsd_path.encode('ascii', errors='ignore').decode("ascii") # TODO: This could probably be optimized to use a regular expression rather than a loop. suppress_statsd = any(blacklisted in statsd_path for blacklisted in statsd_blacklisted_requests) else: suppress_statsd = True statsd_path = '' time_delta = -1 # A time duration of -1 means the StartLogRequests middleware # didn't run for some reason optional_orig_delta = "" if 'time_started' in log_data: time_delta = time.time() - log_data['time_started'] if 'time_stopped' in log_data: orig_time_delta = time_delta time_delta = ((log_data['time_stopped'] - log_data['time_started']) + (time.time() - log_data['time_restarted'])) optional_orig_delta = " (lp: %s)" % ( format_timedelta(orig_time_delta), ) remote_cache_output = "" if 'remote_cache_time_start' in log_data: remote_cache_time_delta = get_remote_cache_time( ) - log_data['remote_cache_time_start'] remote_cache_count_delta = get_remote_cache_requests( ) - log_data['remote_cache_requests_start'] if 'remote_cache_requests_stopped' in log_data: # (now - restarted) + (stopped - start) = (now - start) + (stopped - restarted) remote_cache_time_delta += ( log_data['remote_cache_time_stopped'] - log_data['remote_cache_time_restarted']) remote_cache_count_delta += ( log_data['remote_cache_requests_stopped'] - log_data['remote_cache_requests_restarted']) if (remote_cache_time_delta > 0.005): remote_cache_output = " (mem: %s/%s)" % (format_timedelta( remote_cache_time_delta), remote_cache_count_delta) if not suppress_statsd: statsd.timing("%s.remote_cache.time" % (statsd_path, ), timedelta_ms(remote_cache_time_delta)) statsd.incr("%s.remote_cache.querycount" % (statsd_path, ), remote_cache_count_delta) startup_output = "" if 'startup_time_delta' in log_data and log_data[ "startup_time_delta"] > 0.005: startup_output = " (+start: %s)" % (format_timedelta( log_data["startup_time_delta"]), ) bugdown_output = "" if 'bugdown_time_start' in log_data: bugdown_time_delta = get_bugdown_time( ) - log_data['bugdown_time_start'] bugdown_count_delta = get_bugdown_requests( ) - log_data['bugdown_requests_start'] if 'bugdown_requests_stopped' in log_data: # (now - restarted) + (stopped - start) = (now - start) + (stopped - restarted) bugdown_time_delta += (log_data['bugdown_time_stopped'] - log_data['bugdown_time_restarted']) bugdown_count_delta += (log_data['bugdown_requests_stopped'] - log_data['bugdown_requests_restarted']) if (bugdown_time_delta > 0.005): bugdown_output = " (md: %s/%s)" % ( format_timedelta(bugdown_time_delta), bugdown_count_delta) if not suppress_statsd: statsd.timing("%s.markdown.time" % (statsd_path, ), timedelta_ms(bugdown_time_delta)) statsd.incr("%s.markdown.count" % (statsd_path, ), bugdown_count_delta) # Get the amount of time spent doing database queries db_time_output = "" queries = connection.connection.queries if connection.connection is not None else [] if len(queries) > 0: query_time = sum(float(query.get('time', 0)) for query in queries) db_time_output = " (db: %s/%sq)" % (format_timedelta(query_time), len(queries)) if not suppress_statsd: # Log ms, db ms, and num queries to statsd statsd.timing("%s.dbtime" % (statsd_path, ), timedelta_ms(query_time)) statsd.incr("%s.dbq" % (statsd_path, ), len(queries)) statsd.timing("%s.total" % (statsd_path, ), timedelta_ms(time_delta)) if 'extra' in log_data: extra_request_data = " %s" % (log_data['extra'], ) else: extra_request_data = "" logger_client = "(%s via %s)" % (requestor_for_logs, client_name) logger_timing = ('%5s%s%s%s%s%s %s' % (format_timedelta(time_delta), optional_orig_delta, remote_cache_output, bugdown_output, db_time_output, startup_output, path)) logger_line = ('%-15s %-7s %3d %s%s %s' % (remote_ip, method, status_code, logger_timing, extra_request_data, logger_client)) if (status_code in [200, 304] and method == "GET" and path.startswith("/static")): logger.debug(logger_line) else: logger.info(logger_line) if (is_slow_query(time_delta, path)): queue_json_publish( "slow_queries", dict(query="%s (%s)" % (logger_line, requestor_for_logs))) if settings.PROFILE_ALL_REQUESTS: log_data["prof"].disable() profile_path = "/tmp/profile.data.%s.%s" % ( path.split("/")[-1], int(time_delta * 1000), ) log_data["prof"].dump_stats(profile_path) # Log some additional data whenever we return certain 40x errors if 400 <= status_code < 500 and status_code not in [401, 404, 405]: assert error_content_iter is not None error_content_list = list(error_content_iter) if not error_content_list: error_data = '' elif isinstance(error_content_list[0], str): error_data = ''.join(error_content_list) elif isinstance(error_content_list[0], bytes): error_data = repr(b''.join(error_content_list)) if len(error_data) > 200: error_data = "[content more than 200 characters]" logger.info('status=%3d, data=%s, uid=%s', status_code, error_data, requestor_for_logs)