def get_probability(): msg = request.args.get('message') if not title: return json.dumps({'error': 'no \'message\' argument given'}) prob = util.probability(msg) response = {'message': msg, 'p': prob} return json.dumps(response)
def get_probability(): msg = request.args.get("message") if not title: return json.dumps({"error": "no 'message' argument given"}) prob = util.probability(msg) response = {"message": msg, "p": prob} return json.dumps(response)
def main(): try: exercise_file = open(util.relative_path("exercise_reports"), 'r+') ex_reports = json.loads(exercise_file.read()) except IOError: exercise_file = open(util.relative_path("exercise_reports"), 'w') ex_reports = {"elapsed_time": 1, # Filler value "max_id": -1, "last_time": 0} new_reports = get_errors(copy.deepcopy(ex_reports)) period_len = new_reports["time_this_period"] for ex in new_reports: if ex in SPECIAL_VALUES: continue if ex in ex_reports and ex_reports[ex]["num_errors"] > 0: errors_this_period = new_reports[ex]["this_period"] mean, probability = util.probability(ex_reports[ex]["num_errors"], ex_reports["elapsed_time"], errors_this_period, period_len) print ("%s] TOTAL %s/%ss; %s-: %s/%ss; m=%.3f p=%.3f" % (time.strftime("%Y-%m-%d %H:%M:%S %Z"), ex_reports[ex]["num_errors"], ex_reports["elapsed_time"], ex_reports["last_time"], errors_this_period, period_len, mean, probability)) if (probability > 0.997 and errors_this_period > 1): util.send_to_slack( "*Elevated exercise bug report rate in exercise `%s`\n" "Reports: %s. We saw %s in the last %s minutes," " while the mean indicates we should see around %s." " *Probability that this is abnormally elevated: %.4f.*" % (ex, generate_slack_links(new_reports[ex]["href"]), util.thousand_commas(errors_this_period), util.thousand_commas(int(period_len / 60)), util.thousand_commas(round(mean, 2)), probability), channel="#support") if "href" in new_reports[ex].keys(): del new_reports[ex]["href"] # don't need to keep the links around del new_reports["time_this_period"] # Overwrite with new contents exercise_file.seek(0) exercise_file.truncate() exercise_file.write(json.dumps(new_reports)) exercise_file.close()
def main(): try: exercise_file = open(util.relative_path("exercise_reports"), 'r+') ex_reports = json.loads(exercise_file.read()) except IOError: exercise_file = open(util.relative_path("exercise_reports"), 'w') ex_reports = { "elapsed_time": 1, # Filler value "max_id": -1, "last_time": 0 } new_reports = get_errors(copy.deepcopy(ex_reports)) period_len = new_reports["time_this_period"] for ex in new_reports: if ex in SPECIAL_VALUES: continue if ex in ex_reports and ex_reports[ex]["num_errors"] > 0: errors_this_period = new_reports[ex]["this_period"] mean, probability = util.probability(ex_reports[ex]["num_errors"], ex_reports["elapsed_time"], errors_this_period, period_len) print("%s] TOTAL %s/%ss; %s-: %s/%ss; m=%.3f p=%.3f" % (time.strftime("%Y-%m-%d %H:%M:%S %Z"), ex_reports[ex]["num_errors"], ex_reports["elapsed_time"], ex_reports["last_time"], errors_this_period, period_len, mean, probability)) if (probability > 0.997 and errors_this_period > 1): util.send_to_slack( "*Elevated exercise bug report rate in exercise `%s`\n" "Reports: %s. We saw %s in the last %s minutes," " while the mean indicates we should see around %s." " *Probability that this is abnormally elevated: %.4f.*" % (ex, generate_slack_links(new_reports[ex]["href"]), util.thousand_commas(errors_this_period), util.thousand_commas(int(period_len / 60)), util.thousand_commas(round(mean, 2)), probability), channel="#support") if "href" in new_reports[ex].keys(): del new_reports[ex]["href"] # don't need to keep the links around del new_reports["time_this_period"] # Overwrite with new contents exercise_file.seek(0) exercise_file.truncate() exercise_file.write(json.dumps(new_reports)) exercise_file.close()
def main(): try: exercise_file = open(util.relative_path("exercise_reports"), 'r+') ex_reports = json.loads(exercise_file.read()) except IOError: exercise_file = open(util.relative_path("exercise_reports"), 'w') ex_reports = {"elapsed_time": 1, # Filler value "max_id": -1, "last_time": 0} new_reports = get_errors(copy.deepcopy(ex_reports)) period_len = new_reports["time_this_period"] for ex in new_reports: if ex in SPECIAL_VALUES: continue if ex in ex_reports and ex_reports[ex]["num_errors"] > 0: errors_this_period = new_reports[ex]["this_period"] mean, probability = util.probability(ex_reports[ex]["num_errors"], ex_reports["elapsed_time"], errors_this_period, period_len) if (probability > 0.997 and errors_this_period > 1): # Too many errors! hipchat_message.send_message( "Elevated exercise bug report rate in exercise %s!" " Reports: %s. We saw %s in the last %s minutes," " while the mean indicates we should see around %s." " Probability that this is abnormally elevated: %.4f." % (ex, generate_links(new_reports[ex]["href"]), util.thousand_commas(errors_this_period), util.thousand_commas(int(period_len / 60)), util.thousand_commas(round(mean, 2)), probability), room_id="Exercises") if "href" in new_reports[ex].keys(): del new_reports[ex]["href"] # don't need to keep the links around del new_reports["time_this_period"] # Overwrite with new contents exercise_file.seek(0) exercise_file.truncate() exercise_file.write(json.dumps(new_reports)) exercise_file.close()
def main(): try: google_code_file = open(util.relative_path("google_code"), 'r+') old_reports = json.loads(google_code_file.read()) except IOError: google_code_file = open(util.relative_path("google_code"), 'w') # elapsed_time is filler value: doesn't matter what it is # since issue_count is 0. old_reports = {"elapsed_time": 1, "last_id": -1, "issue_count": 0, "last_time": 0} new_reports = get_errors(copy.deepcopy(old_reports)) time_this_period = new_reports["time_this_period"] mean, probability = util.probability(old_reports["issue_count"], old_reports["elapsed_time"], new_reports["issues_this_period"], time_this_period) if (mean != 0 and probability > 0.99): # Too many errors! hipchat_message.send_message( "Elevated bug report rate on" " <a href='http://khanacademy.org/r/bugs'>Google" " code!</a>" " We saw %s in the last %s minutes," " while the mean indicates we should see around %s." " Probability that this is abnormally elevated: %.4f." % (util.thousand_commas(new_reports["issues_this_period"]), util.thousand_commas(int(time_this_period / 60)), util.thousand_commas(round(mean, 2)), probability)) # Delete fields we don't need anymore del(new_reports["issues_this_period"]) del(new_reports["time_this_period"]) google_code_file.seek(0) google_code_file.truncate() google_code_file.write(json.dumps(new_reports)) google_code_file.close()
def main(): try: zendesk_status_file = util.relative_path("zendesk") with open(zendesk_status_file) as f: old_data = cPickle.load(f) except (IOError, EOFError): old_data = {"elapsed_time_weekday": 0.0001, # avoid a divide-by-0 "elapsed_time_weekend": 0.0001, # avoid a divide-by-0 "ticket_count_weekday": 0, "ticket_count_weekend": 0, "last_time_t": None, "last_time_t_weekday": None, "last_time_t_weekend": None, } # We compare the number of tickets in the last few minutes against # the historical average for all time. But we don't start "all # time" at AD 1, we start it a week ago. Longer than that and it # takes forever due to quota issues. That's still plenty of # historical data. :-) # # Zendesk seems to wait 5 minutes to update API data :-(, so we # ask for data that's a bit time-lagged end_time = int(time.time()) - 300 start_time = old_data['last_time_t'] # Set flag to track if current time period is a weekend. Separate # ticket_count/elapsed_time stats are kept for weekend vs. weekday # to improve sensitivity to increases during low-traffic periods is_off_hours = _is_off_hours(datetime.datetime.fromtimestamp(end_time)) (new_tickets, oldest_ticket_time_t) = get_tickets_between( start_time or (end_time - 86400 * 7), end_time) num_new_tickets = len(new_tickets) # The first time we run this, we take the starting time to be the # time of the first bug report. if start_time is None: start_time = oldest_ticket_time_t time_this_period = end_time - start_time if is_off_hours: # To simplify backcompat we still use "weekend" and "weekday" in the # saved data; really they mean "on hours" and "off hours" now. ticket_count = old_data['ticket_count_weekend'] elapsed_time = old_data['elapsed_time_weekend'] else: ticket_count = old_data['ticket_count_weekday'] elapsed_time = old_data['elapsed_time_weekday'] (mean, probability) = util.probability(ticket_count, elapsed_time, num_new_tickets, time_this_period) print ("%s] TOTAL %s/%ss; %s-: %s/%ss; m=%.3f p=%.3f" % (time.strftime("%Y-%m-%d %H:%M:%S %Z"), ticket_count, int(elapsed_time), start_time, num_new_tickets, time_this_period, mean, probability)) handle_alerts(new_tickets, time_this_period, mean, probability, start_time, end_time) if is_off_hours: new_data = {"elapsed_time_weekend": ( old_data["elapsed_time_weekend"] + time_this_period), "ticket_count_weekend": ( old_data["ticket_count_weekend"] + num_new_tickets), "elapsed_time_weekday": old_data["elapsed_time_weekday"], "ticket_count_weekday": old_data["ticket_count_weekday"], } else: new_data = {"elapsed_time_weekend": old_data["elapsed_time_weekend"], "ticket_count_weekend": old_data["ticket_count_weekend"], "elapsed_time_weekday": ( old_data["elapsed_time_weekday"] + time_this_period), "ticket_count_weekday": ( old_data["ticket_count_weekday"] + num_new_tickets), } new_data['last_time_t'] = end_time with open(zendesk_status_file, 'w') as f: cPickle.dump(new_data, f)
THRESH = 0.05 # Repeat indefinitely while 1: # Capture the input from stdin, which is the stream of data from ingest.py. # It is dumped as JSON, so decode it. line = stdin.readline() edit = json.loads(line) # We are extracting the 'wiki' key, which is a unique identifier for the # Wikipedia that was edited. message = edit.get('wiki') # I have written a function in util that gets the probability of a # particular message, given the entries in the Redis database. prob = util.probability(message) # If the probability falls below our threshold, emit a message. Otherwise, # loop around. if prob < THRESH: # This schema (particularly the 'unlikely_message' type) is understood # by the slack.py file, which sends the appropriate alerts. alert = { 'type': 'unlikely_message', 'message': message, 'prob': prob } # Print the alert to stdout and flush stdout to prevent message delay # from buffering. print(json.dumps(alert)) stdout.flush()
n_PC) + 'PC/control/' name_out = mode + '_control_' + 'traj_' + str(traj_n) plt.savefig(path_out + name_out + ".png") plt.show() ########## Retrieve maxiumum number of elements based on a raw decimal discretization ########## data = mPE_vector.flatten() kmeans = KMeans(n_clusters=20).fit(data.reshape(-1, 1)) kmeans.predict(data.reshape(-1, 1)) centroids = kmeans.cluster_centers_ centroids = [centroids[i] for i in range(len(centroids))] centroids = np.asarray(centroids) ########## Get probability vectors ########## prob1 = probability(mPE_vector[0, :, 0], centroids) prob2 = probability(mPE_vector[1, :, 0], centroids) ########## Evaluation ########## significance_lev = significance_level js_distance = distance.jensenshannon(prob1, prob2) [_, p_value] = stats.ks_2samp(mPE_vector[0, :, 0], mPE_vector[1, :, 0]) if p_value > 0.8: too_low = False break measures[1, 0] = p_value measures[2, 0] = js_distance[0]
def main(): try: jira_status_file = util.relative_path('jira') with open(jira_status_file) as f: old_data = cPickle.load(f) except IOError: old_data = {'elapsed_times': {}, 'ticket_counts': collections.defaultdict(int), 'last_time_t': None, } # We compare the number of tickets in the last few minutes against # the historical average for all time. But we don't start "all # time" at AD 1, we start it 100 days ago. # Note: this is a way wider window than we use for Zendesk, but we're # making exercise-specific recommendations, so we need more data. now = int(time.time()) num_days_in_past = 100 (num_new_tickets, oldest_ticket_time_t) = num_tickets_between( old_data['last_time_t'] or (now - 86400 * num_days_in_past), now) # Elapsed time is computed per-exercise, so store values as we go. # We use a copy so that exercises that don't appear as new tickets still # have their old elapsed times preserved. elapsed_times = copy.copy(old_data['elapsed_times']) for exercise in num_new_tickets: # If this is the first time we're running, we don't have a last_time_t, # so we take the oldest ticket for each exercise as its last_time_t last_time_t = old_data['last_time_t'] or oldest_ticket_time_t[exercise] time_this_period = now - last_time_t # Avoid divide-by-0 if this is the first time we've seen an exercise time_last_period = old_data['elapsed_times'].get(exercise, 0.0001) num_old_tickets_for_exercise = old_data['ticket_counts'][exercise] num_new_tickets_for_exercise = num_new_tickets[exercise] (mean, probability) = util.probability(num_old_tickets_for_exercise, time_last_period, num_new_tickets_for_exercise, time_this_period) print('%s] %s TOTAL %s/%ss; %s-: %s/%ss; m=%.3f p=%.3f' % (time.strftime('%Y-%m-%d %H:%M:%S %Z'), exercise, num_old_tickets_for_exercise, int(time_last_period), last_time_t, num_new_tickets_for_exercise, time_this_period, mean, probability)) if (mean != 0 and probability > 0.9995 and num_new_tickets_for_exercise > THRESHOLD): quoted = urllib.quote(exercise.encode("utf-8")) ka_url = "https://khanacademy.org/e/%s" % quoted jira_url = "https://khanacademy.atlassian.net/browse/AI-941528?jql=Exercise%%20%%3D%%20%s" % quoted util.send_to_slack( "*Elevated bug report rate on exercise `%s`*\n" "We saw %s in the last %s minutes," " while the mean indicates we should see around %s." " *Probability that this is abnormally elevated: %.4f.*\n" " Links: <%s|exercise on Khan Academy>, <%s|JIRA tickets>." % (exercise, util.thousand_commas(num_new_tickets_for_exercise), util.thousand_commas(int(time_this_period / 60)), util.thousand_commas(round(mean, 2)), probability, ka_url, jira_url), channel='#content-beep-boop') elapsed_times[exercise] = time_last_period + time_this_period new_ticket_counts = util.merge_int_dicts(old_data['ticket_counts'], num_new_tickets) new_data = {'elapsed_times': elapsed_times, 'ticket_counts': new_ticket_counts, 'last_time_t': now, } with open(jira_status_file, 'w') as f: cPickle.dump(new_data, f)
def main(): try: zendesk_status_file = util.relative_path("zendesk") with open(zendesk_status_file) as f: old_data = cPickle.load(f) except IOError: old_data = {"elapsed_time": 0.0001, # avoid a divide-by-0 "ticket_count": 0, "last_time_t": None, } # We compare the number of tickets in the last few minutes against # the historical average for all time. But we don't start "all # time" at AD 1, we start it a week ago. Longer than that and it # takes forever due to quota issues. That's still plenty of # historical data. :-) # # Zendesk seems to wait 5 minutes to update API data :-(, so we # ask for data that's a bit time-lagged end_time = int(time.time()) - 300 start_time = old_data['last_time_t'] # Set flag to track if current time period is a weekend. Separate # ticket_count/elapsed_time stats are kept for weekend vs. weekday # to improve sensitivity to increases during low-traffic periods is_weekend = time.localtime().tm_wday in [5, 6] (num_new_tickets, oldest_ticket_time_t) = num_tickets_between( start_time or (end_time - 86400 * 7), end_time) # The first time we run this, we take the starting time to be the # time of the first bug report. if start_time is None: start_time = oldest_ticket_time_t time_this_period = end_time - start_time # To handle transition from unsegmented to segmented data, below sets # the weekend data to mirror the stats from the past 4 months of logs # to calculate a mean, and shifts all historical data to the weekday # data points. This will result in some inaccuracy, but the weekend # data should skew the weekday data only negligably. May cause some # skewed alerting during the transition period. # TODO(jacqueline): Remove this transition code after August 2017 if 'elapsed_time' in old_data: old_data['ticket_count_weekday'] = old_data['ticket_count'] old_data['ticket_count_weekend'] = 555 old_data['elapsed_time_weekday'] = old_data['elapsed_time'] old_data['elapsed_time_weekend'] = 2921756.0001 if is_weekend is True: ticket_count = old_data['ticket_count_weekend'] elapsed_time = old_data['elapsed_time_weekend'] else: ticket_count = old_data['ticket_count_weekday'] elapsed_time = old_data['elapsed_time_weekday'] (mean, probability) = util.probability(ticket_count, elapsed_time, num_new_tickets, time_this_period) print ("%s] TOTAL %s/%ss; %s-: %s/%ss; m=%.3f p=%.3f" % (time.strftime("%Y-%m-%d %H:%M:%S %Z"), ticket_count, int(elapsed_time), start_time, num_new_tickets, time_this_period, mean, probability)) handle_alerts(num_new_tickets, time_this_period, mean, probability, start_time, end_time) if is_weekend is True: new_data = {"elapsed_time_weekend": ( old_data["elapsed_time_weekend"] + time_this_period), "ticket_count_weekend": ( old_data["ticket_count_weekend"] + num_new_tickets), "elapsed_time_weekday": old_data["elapsed_time_weekday"], "ticket_count_weekday": old_data["ticket_count_weekday"], } else: new_data = {"elapsed_time_weekend": old_data["elapsed_time_weekend"], "ticket_count_weekend": old_data["ticket_count_weekend"], "elapsed_time_weekday": ( old_data["elapsed_time_weekday"] + time_this_period), "ticket_count_weekday": ( old_data["ticket_count_weekday"] + num_new_tickets), } new_data['last_time_t'] = end_time with open(zendesk_status_file, 'w') as f: cPickle.dump(new_data, f)