def log_text(d, daystring): add_timestamps(d) char = d['level'][0].upper() streamlog ("%s [%s] %r" % (d['hms'], char, d['text']), verbose) logclass_key = "logclass-" + d['classification'] if not g.hardcache.get(logclass_key): g.hardcache.set(logclass_key, True, 86400 * 90) if d['level'] != 'debug': news = "The code just generated a [%s] message.\n" % \ d['classification'] news += "I don't remember ever seeing one of those before.\n" news += "\n" news += "It happened on: %s\n" % d['occ'] news += "The log level was: %s\n" % d['level'] news += "The complete text was:\n" news += repr(d['text']) emailer.nerds_email (news, "reddit secretary") occ_key = "-".join(["logtext", daystring, d['level'], d['classification']]) occurrences = g.hardcache.get(occ_key) if occurrences is None: occurrences = [] d2 = {} d2['occ'] = d['occ'] d2['text'] = repr(d['text']) limited_append(occurrences, d2) g.hardcache.set(occ_key, occurrences, 86400 * 7)
def log_text(d, daystring): add_timestamps(d) char = d['level'][0].upper() streamlog("%s [%s] %r" % (d['hms'], char, d['text']), verbose) logclass_key = "logclass-" + d['classification'] if not g.hardcache.get(logclass_key): g.hardcache.set(logclass_key, True, 86400 * 90) if d['level'] != 'debug': news = "The code just generated a [%s] message.\n" % \ d['classification'] news += "I don't remember ever seeing one of those before.\n" news += "\n" news += "It happened on: %s\n" % d['occ'] news += "The log level was: %s\n" % d['level'] news += "The complete text was:\n" news += repr(d['text']) emailer.nerds_email(news, "reddit secretary") occ_key = "-".join( ["logtext", daystring, d['level'], d['classification']]) occurrences = g.hardcache.get(occ_key) if occurrences is None: occurrences = [] d2 = {} d2['occ'] = d['occ'] d2['text'] = repr(d['text']) limited_append(occurrences, d2) g.hardcache.set(occ_key, occurrences, 86400 * 7)
def log_exception(d, daystring): exc_desc = d['exception_desc'] exc_type = d['exception_type'] exc_str = "%s: %s" % (exc_type, exc_desc) add_timestamps(d) tb = [] key_material = exc_type pretty_lines = [] make_lock_seen = False flaky_db_seen = False cassandra_seen = False for tpl in d['traceback']: tb.append(tpl) filename, lineno, funcname, text = tpl if text is None: pass elif (text.startswith("with g.make_lock(") or text.startswith("with make_lock(")): make_lock_seen = True elif (text.startswith("(ProgrammingError) server closed the connection")): flaky_db_seen = True if '/cassandra/' in filename.lower(): cassandra_seen = True if '/pycassa/' in filename.lower(): cassandra_seen = True key_material += "%s %s " % (filename, funcname) pretty_lines.append ("%s:%s: %s()" % (filename, lineno, funcname)) pretty_lines.append (" %s" % text) if exc_desc.startswith("QueuePool limit of size"): fingerprint = "QueuePool_overflow" elif exc_desc.startswith("error 2 from memcached_get: HOSTNAME "): fingerprint = "memcache_suckitude" elif exc_type == "TimeoutExpired" and make_lock_seen: fingerprint = "make_lock_timeout" elif exc_desc.startswith("(OperationalError) FATAL: the database " + "system is in recovery mode"): fingerprint = "recovering_db" elif exc_desc.startswith("(OperationalError) could not connect " + "to server"): fingerprint = "unconnectable_db" elif exc_desc.startswith("(OperationalError) server closed the " + "connection unexpectedly"): fingerprint = "flaky_db_op" elif cassandra_seen: fingerprint = "something's wrong with cassandra" else: fingerprint = md5(key_material).hexdigest() nickname_key = "error_nickname-" + fingerprint status_key = "error_status-" + fingerprint nickname = g.hardcache.get(nickname_key) if nickname is None: nickname = '"%s" Exception' % randword().capitalize() news = ("A new kind of thing just happened! " + "I'm going to call it a %s\n\n" % nickname) news += "Where and when: %s\n\n" % d['occ'] news += "Traceback:\n" news += "\n".join(pretty_lines) news += exc_str news += "\n" emailer.nerds_email(news, "Exception Watcher") g.hardcache.set(nickname_key, nickname, 86400 * 365) g.hardcache.set(status_key, "new", 86400) if g.hardcache.get(status_key) == "fixed": g.hardcache.set(status_key, "new", 86400) news = "This was marked as fixed: %s\n" % nickname news += "But it just occurred, so I'm marking it new again." emailer.nerds_email(news, "Exception Watcher") err_key = "-".join(["error", daystring, fingerprint]) existing = g.hardcache.get(err_key) if not existing: existing = dict(exception=exc_str, traceback=tb, occurrences=[]) existing.setdefault('times_seen', 0) existing['times_seen'] += 1 limited_append(existing['occurrences'], d['occ']) g.hardcache.set(err_key, existing, 7 * 86400) streamlog ("%s [X] %-70s" % (d['hms'], nickname), verbose)
def log_exception(d, daystring): exc_desc = d['exception_desc'] exc_type = d['exception_type'] exc_str = "%s: %s" % (exc_type, exc_desc) add_timestamps(d) tb = [] key_material = exc_type pretty_lines = [] make_lock_seen = False flaky_db_seen = False cassandra_seen = False for tpl in d['traceback']: tb.append(tpl) filename, lineno, funcname, text = tpl if text is None: pass elif (text.startswith("with g.make_lock(") or text.startswith("with make_lock(")): make_lock_seen = True elif (text.startswith( "(ProgrammingError) server closed the connection")): flaky_db_seen = True if '/cassandra/' in filename: cassandra_seen = True key_material += "%s %s " % (filename, funcname) pretty_lines.append("%s:%s: %s()" % (filename, lineno, funcname)) pretty_lines.append(" %s" % text) if exc_desc.startswith("QueuePool limit of size"): fingerprint = "QueuePool_overflow" elif exc_desc.startswith("error 2 from memcached_get: HOSTNAME "): fingerprint = "memcache_suckitude" elif exc_type == "TimeoutExpired" and make_lock_seen: fingerprint = "make_lock_timeout" elif exc_desc.startswith("(OperationalError) FATAL: the database " + "system is in recovery mode"): fingerprint = "recovering_db" elif exc_desc.startswith("(OperationalError) could not connect " + "to server"): fingerprint = "unconnectable_db" elif exc_desc.startswith("(OperationalError) server closed the " + "connection unexpectedly"): fingerprint = "flaky_db_op" elif exc_type == "ProgrammingError" and flaky_db_seen: fingerprint = "flaky_db_prog" # SQLAlchemy includes the entire query in the exception # description which can sometimes be gigantic, in the case of # SELECTs. Get rid of it. select_pos = exc_str.find("SELECT") if select_pos > 0: exc_str = exc_str[pos] elif exc_type == "NoServerAvailable": fingerprint = "cassandra_suckitude" elif exc_type == "TimedOutException" and cassandra_seen: fingerprint = "cassandra_suckitude #2" else: fingerprint = md5(key_material).hexdigest() nickname_key = "error_nickname-" + fingerprint status_key = "error_status-" + fingerprint nickname = g.hardcache.get(nickname_key) if nickname is None: nickname = '"%s" Exception' % randword().capitalize() news = ("A new kind of thing just happened! " + "I'm going to call it a %s\n\n" % nickname) news += "Where and when: %s\n\n" % d['occ'] news += "Traceback:\n" news += "\n".join(pretty_lines) news += exc_str news += "\n" emailer.nerds_email(news, "Exception Watcher") g.hardcache.set(nickname_key, nickname, 86400 * 365) g.hardcache.set(status_key, "new", 86400) if g.hardcache.get(status_key) == "fixed": g.hardcache.set(status_key, "new", 86400) news = "This was marked as fixed: %s\n" % nickname news += "But it just occurred, so I'm marking it new again." emailer.nerds_email(news, "Exception Watcher") err_key = "-".join(["error", daystring, fingerprint]) existing = g.hardcache.get(err_key) if not existing: existing = dict(exception=exc_str, traceback=tb, occurrences=[]) limited_append(existing['occurrences'], d['occ']) g.hardcache.set(err_key, existing, 7 * 86400) streamlog("%s [X] %-70s" % (d['hms'], nickname), verbose)