def _aggregate(where, delta, is_base, now, old, new, comments = None): global _INPUT_DB _INPUT_DB = _INPUT_DB if _INPUT_DB else inputDb('input_mozilla_org_new') sql = ''' SELECT description, id FROM feedback_response fr WHERE %s AND locale = "en-US" AND happy = 0 AND (campaign IS NULL OR campaign = "") AND (source IS NULL OR source = "") AND (version NOT RLIKE "[^a.0-9]") ; ''' % where try: results = _INPUT_DB.execute_sql(sql, old = old, new = new, now = now) except (OperationalError): #TODO(rrayborn): raise an alert instead of just warning. warn('Database timed out executing base sql.') return total = 0 for row in results: # Tokenize the row into delta and store comments for retreival in comments (word_dict, value) = tokenize(row.description, input_id = row.id) if comments is not None: comments[row.id] = row.description if value > 0: for (key, word_set) in word_dict.iteritems(): if (key is None) or not re.match('\S', key): continue data_set = delta[key].base if is_base else delta[key].after data_set.insert(key = key, link = (row.id, value), meta = word_set) total += 1 return total
def _aggregate(where, delta, is_base, now, old, new, comments=None): global _INPUT_DB _INPUT_DB = _INPUT_DB if _INPUT_DB else inputDb('input_mozilla_org_new') sql = ''' SELECT description, id FROM feedback_response fr WHERE %s AND locale = "en-US" AND happy = 0 AND (campaign IS NULL OR campaign = "") AND (source IS NULL OR source = "") AND (version NOT RLIKE "[^a.0-9]") ; ''' % where try: results = _INPUT_DB.execute_sql(sql, old=old, new=new, now=now) except (OperationalError): #TODO(rrayborn): raise an alert instead of just warning. warn('Database timed out executing base sql.') return total = 0 for row in results: # Tokenize the row into delta and store comments for retreival in comments (word_dict, value) = tokenize(row.description, input_id=row.id) if comments is not None: comments[row.id] = row.description if value > 0: for (key, word_set) in word_dict.iteritems(): if (key is None) or not re.match('\S', key): continue data_set = delta[key].base if is_base else delta[key].after data_set.insert(key=key, link=(row.id, value), meta=word_set) total += 1 return total
def process_alerts(product, now = datetime.now(), old = _PAST_TIMEFRAME, new = None, debug = False, debug_file = sys.stdout, send_email = True, address = None): delta = defaultdict(WordDeltaCounter) # Resolve date if not isinstance(now, datetime) and isinstance(now, date): now = datetime.combine(now, time(0,0,0)) #if not isinstance(now, datetime): # I don't feel like checking this. It's not a likely exception. #raise Exception('"now" must me of type datetime or date.') now_string = now.strftime('%Y-%m-%d %H:%M:%S') now = tz('US/Pacific').localize(now) # Product related vars if product.lower() == 'desktop': new = new if new else _DESKTOP_TIMEFRAME where_product = ('product = "firefox"' + '\nAND LEFT(platform,7) IN("Windows","OS X","Linux")') flavor = 'word-based' subject = 'Desktop Input Alert' address = address if address else _DESKTOP_EMAIL elif product.lower() == 'android': new = new if new else _ANDROID_TIMEFRAME where_product = 'product = "Firefox for Android"' flavor = 'android-word-based' subject = 'Android Input Alert' address = address if address else _ANDROID_EMAIL else: raise Exception('product must be "desktop" or "android".') # Resolve debug info if debug and not isinstance(debug_file, file): warn('Debug file should be type <file>, outputting to stdout.') debug_file = sys.stdout is_print = not debug or debug_file != sys.stdout if is_print: print 'Generating %s for %s' % (subject, now_string) # Retrieve old timeframe where = (where_product + '\nAND created > DATE_SUB(:now, INTERVAL :old WEEK)' + '\nAND created < DATE_SUB(:now, INTERVAL :new HOUR)') base_total = _aggregate(where, delta, True, now_string, old, new) # Retrieve new timeframe after_comments = {} where = (where_product + '\nAND created > DATE_SUB(:now, INTERVAL :new HOUR)' + '\nAND created < :now') after_total = _aggregate(where, delta, False, now_string, old, new, comments = after_comments) if (after_total < _MIN_DENOM_THRESHOLD or base_total < _MIN_DENOM_THRESHOLD): warn('NOT ENOUGH FEEDBACK %d before and %d after' % (base_total, after_total)) return #Generate alerts alerted_feedback = {} # Determine if we should alert for each word and add the alert feedback to a # dict for spam detection for (k,v) in delta.iteritems(): v.set_thresholds(diff_pct = _DIFF_PCT_MIN, diff_abs = _DIFF_ABS_MIN) v.set_potentials(base = base_total, after = after_total) v.end_time = tz('UTC').normalize(now) if (v.is_significant and v.severity >= _ALERT_SEV_MIN and v.after.count >= _MIN_COUNT_THRESHOLD): for link_item in v.after.link_list: alerted_feedback[link_item[0]] = link_item[1] v.alert = True # Find spam test_spam = { x: after_comments[x] for x in alerted_feedback.keys() } spam = SpamDetector().check_entries_for_spam(test_spam) # Remove spam after_total -= len(spam.keys()) for (k,v) in delta.iteritems(): if (v.alert): for s in spam.keys(): if s in v.after.link_list: v.after.remove(link = (s, alerted_feedback[s])) v.alert = False # Reprocess alerts while removing spam has_alerts = False email_list = set() for (k,v) in delta.iteritems(): v.set_potentials(base = base_total, after = after_total) if v.is_significant and v.after.count >= _MIN_COUNT_THRESHOLD: if v.severity >= _ALERT_SEV_MIN: if is_print: print 'Emitting alert for %s' % v.after.sorted_metadata[0] v.emit(timeframe = new, flavor = flavor, debug = debug, debug_file = debug_file) has_alerts = True if send_email and v.severity >= _EMAIL_SEV_MIN: email_list.add(v) if not has_alerts: # This is super fishy but technically valid usecase. # Might alert on this in the future if is_print: print 'No alerts today' return # Now send an email, looking up each piece of feedback. if email_list: _email_results(email_list, subject, address, after_comments)
def emit(self, timeframe = None, flavor = 'unknown', debug = False, debug_file = sys.stdout): if debug: if isinstance(debug_file, file): self.log_to_csv(debug_file) else: warn('Debug file should be type <file>, outputting to stdout') self.log_to_csv(sys.stdout) return headers = { 'content-type': 'application/json', 'accept': 'application/json; indent=4', 'Fjord-Authorization': 'Token ' + _ALERT_TOKEN, } timediff = timedelta(hours = timeframe) start_time = self.end_time - timediff link_list = list(self.after.link_list) link_list.sort(key = lambda x:(x[1], x[0]), reverse=True) link_list = link_list[:_MAX_ALERT_LINKS] links = [] for link in link_list: links.append({ 'name': 'Input Link', 'url' : 'http://input.mozilla.org/dashboard/response/' + \ str(link[0]) }) description = dedent(''' Trending words: %s Before: %.2f/1000 After %.2f/1000 Absolute Difference: %.2f %%age points Percent Difference: %.2f %% Total feedback in the past %d hours: %d '''%( ', '.join(self.after.sorted_metadata), self.base_pct * 10, self.after_pct * 10, self.diff_abs * 10, self.diff_pct, timeframe, len(self.after.link_list) )).strip() payload = { 'severity': self.severity, 'summary': '%s is trending up by %.2f'%\ (self.after.sorted_metadata[0], self.diff_pct), 'description': description, 'flavor': flavor, 'emitter_name': 'input_word_alert', 'emitter_version': _VERSION, 'links': links, 'start_time': start_time.isoformat(), 'end_time': self.end_time.isoformat() } resp = requests.post( 'https://input.mozilla.org/api/v1/alerts/alert/', data=json.dumps(payload), headers=headers ) if resp.status_code == 201: print 'All systems good. Submitted alert for %s' % \ (self.after.sorted_metadata[0]) else: print 'Failed to submit alert for %s' % \ (self.after.sorted_metadata[0]) print resp.json()['detail']
def process_alerts(product, now=datetime.now(), old=_PAST_TIMEFRAME, new=None, debug=False, debug_file=sys.stdout, send_email=True, address=None): delta = defaultdict(WordDeltaCounter) # Resolve date if not isinstance(now, datetime) and isinstance(now, date): now = datetime.combine(now, time(0, 0, 0)) #if not isinstance(now, datetime): # I don't feel like checking this. It's not a likely exception. #raise Exception('"now" must me of type datetime or date.') now_string = now.strftime('%Y-%m-%d %H:%M:%S') now = tz('US/Pacific').localize(now) # Product related vars if product.lower() == 'desktop': new = new if new else _DESKTOP_TIMEFRAME where_product = ('product = "firefox"' + '\nAND LEFT(platform,7) IN("Windows","OS X","Linux")') flavor = 'word-based' subject = 'Desktop Input Alert' address = address if address else _DESKTOP_EMAIL elif product.lower() == 'android': new = new if new else _ANDROID_TIMEFRAME where_product = 'product = "Firefox for Android"' flavor = 'android-word-based' subject = 'Android Input Alert' address = address if address else _ANDROID_EMAIL else: raise Exception('product must be "desktop" or "android".') # Resolve debug info if debug and not isinstance(debug_file, file): warn('Debug file should be type <file>, outputting to stdout.') debug_file = sys.stdout is_print = not debug or debug_file != sys.stdout if is_print: print 'Generating %s for %s' % (subject, now_string) # Retrieve old timeframe where = (where_product + '\nAND created > DATE_SUB(:now, INTERVAL :old WEEK)' + '\nAND created < DATE_SUB(:now, INTERVAL :new HOUR)') base_total = _aggregate(where, delta, True, now_string, old, new) # Retrieve new timeframe after_comments = {} where = (where_product + '\nAND created > DATE_SUB(:now, INTERVAL :new HOUR)' + '\nAND created < :now') after_total = _aggregate(where, delta, False, now_string, old, new, comments=after_comments) if (after_total < _MIN_DENOM_THRESHOLD or base_total < _MIN_DENOM_THRESHOLD): warn('NOT ENOUGH FEEDBACK %d before and %d after' % (base_total, after_total)) return #Generate alerts alerted_feedback = {} # Determine if we should alert for each word and add the alert feedback to a # dict for spam detection for (k, v) in delta.iteritems(): v.set_thresholds(diff_pct=_DIFF_PCT_MIN, diff_abs=_DIFF_ABS_MIN) v.set_potentials(base=base_total, after=after_total) v.end_time = tz('UTC').normalize(now) if (v.is_significant and v.severity >= _ALERT_SEV_MIN and v.after.count >= _MIN_COUNT_THRESHOLD): for link_item in v.after.link_list: alerted_feedback[link_item[0]] = link_item[1] v.alert = True # Find spam test_spam = {x: after_comments[x] for x in alerted_feedback.keys()} spam = SpamDetector().check_entries_for_spam(test_spam) # Remove spam after_total -= len(spam.keys()) for (k, v) in delta.iteritems(): if (v.alert): for s in spam.keys(): if s in v.after.link_list: v.after.remove(link=(s, alerted_feedback[s])) v.alert = False # Reprocess alerts while removing spam has_alerts = False email_list = set() for (k, v) in delta.iteritems(): v.set_potentials(base=base_total, after=after_total) if v.is_significant and v.after.count >= _MIN_COUNT_THRESHOLD: if v.severity >= _ALERT_SEV_MIN: if is_print: print 'Emitting alert for %s' % v.after.sorted_metadata[0] v.emit(timeframe=new, flavor=flavor, debug=debug, debug_file=debug_file) has_alerts = True if send_email and v.severity >= _EMAIL_SEV_MIN: email_list.add(v) if not has_alerts: # This is super fishy but technically valid usecase. # Might alert on this in the future if is_print: print 'No alerts today' return # Now send an email, looking up each piece of feedback. if email_list: _email_results(email_list, subject, address, after_comments)
def emit(self, timeframe=None, flavor='unknown', debug=False, debug_file=sys.stdout): if debug: if isinstance(debug_file, file): self.log_to_csv(debug_file) else: warn('Debug file should be type <file>, outputting to stdout') self.log_to_csv(sys.stdout) return headers = { 'content-type': 'application/json', 'accept': 'application/json; indent=4', 'Fjord-Authorization': 'Token ' + _ALERT_TOKEN, } timediff = timedelta(hours=timeframe) start_time = self.end_time - timediff link_list = list(self.after.link_list) link_list.sort(key=lambda x: (x[1], x[0]), reverse=True) link_list = link_list[:_MAX_ALERT_LINKS] links = [] for link in link_list: links.append({ 'name': 'Input Link', 'url' : 'http://input.mozilla.org/dashboard/response/' + \ str(link[0]) }) description = dedent(''' Trending words: %s Before: %.2f/1000 After %.2f/1000 Absolute Difference: %.2f %%age points Percent Difference: %.2f %% Total feedback in the past %d hours: %d ''' % (', '.join(self.after.sorted_metadata), self.base_pct * 10, self.after_pct * 10, self.diff_abs * 10, self.diff_pct, timeframe, len(self.after.link_list))).strip() payload = { 'severity': self.severity, 'summary': '%s is trending up by %.2f'%\ (self.after.sorted_metadata[0], self.diff_pct), 'description': description, 'flavor': flavor, 'emitter_name': 'input_word_alert', 'emitter_version': _VERSION, 'links': links, 'start_time': start_time.isoformat(), 'end_time': self.end_time.isoformat() } resp = requests.post('https://input.mozilla.org/api/v1/alerts/alert/', data=json.dumps(payload), headers=headers) if resp.status_code == 201: print 'All systems good. Submitted alert for %s' % \ (self.after.sorted_metadata[0]) else: print 'Failed to submit alert for %s' % \ (self.after.sorted_metadata[0]) print resp.json()['detail']
def process_alerts(date = None, debug = False, debug_file = sys.stdout, email = True): input_db = inputDb('input_mozilla_org_new') delta = defaultdict(WordDeltaCounter) base_total = 0 after_total = 0 if (date is None) : date = dt.datetime.now() if (isinstance(date, dt.datetime)): pass elif (isinstance(date, dt.date)): date = dt.datetime.combine(date, dt.time(0,0,0)) date_string = date.strftime('%Y-%m-%d %H:%M:%S') date = tz('US/Pacific').localize(date) if debug: if not isinstance(debug_file, file): warn("Debug file should be type <file>, outputting to stdout instead") debug_file = sys.stdout if (not debug or debug_file != sys.stdout): print "Generating alerts for " + date_string old_data_sql = """ SELECT description, MIN(id) as id FROM feedback_response fr WHERE created > DATE_SUB(:now, INTERVAL :old WEEK) AND created < DATE_SUB(:now, INTERVAL :new HOUR) AND product LIKE 'firefox' AND locale = 'en-US' AND happy = 0 AND (campaign IS NULL or campaign = '') AND (source IS NULL or source = '') AND (version NOT RLIKE '[^a.0-9]') AND (platform LIKE 'Windows%' OR platform LIKE 'OS X' OR platform LIKE 'Linux') GROUP BY 1 """ try: results = input_db.execute_sql(old_data_sql, old=_PAST_TIMEFRAME, new=_TIMEFRAME, now = date_string) except (OperationalError): warn("Database timed out executing base sql.") #TODO: raise an alert instead of just printing. return for row in results: (word_dict, value) = tokenize(row.description) if value == 0: continue for (key, word_set) in word_dict.iteritems(): if (key is None) or not re.match('\S', key): continue delta[key].base.insert(key = key, link = (row.id, value), meta = word_set) base_total += 1 new_data_sql = """ SELECT description, MIN(id) as id FROM feedback_response fr WHERE created > DATE_SUB(:now, INTERVAL :new HOUR) AND created < :now AND product LIKE 'firefox' AND locale = 'en-US' AND happy = 0 AND (campaign IS NULL or campaign = '') AND (source IS NULL or source = '') AND (version NOT RLIKE '[^a.0-9]') AND (platform LIKE 'Windows%' OR platform LIKE 'OS X' OR platform LIKE 'Linux') GROUP BY 1 """ try: results = input_db.execute_sql(new_data_sql, new=_TIMEFRAME, now = date_string) except (OperationalError): warn("Database timed out executing after sql.") return for row in results: (word_dict, value) = tokenize(row.description) if value == 0: continue for (key, word_set) in word_dict.iteritems(): if (key is None) or not re.match('\S', key): continue delta[key].after.insert(key = key, link = (row.id, value), meta = word_set) after_total += 1 if (after_total < _MIN_DENOM_THRESHOLD or base_total < _MIN_DENOM_THRESHOLD): warn("NOT ENOUGH FEEDBACK %d before and %d after" % (base_total, after_total)) return #Generate alerts alert_count = 0 for (k,v) in delta.iteritems(): v.set_thresholds(diff_pct = _DIFF_PCT_MIN, diff_abs = _DIFF_ABS_MIN) v.set_potentials(base = base_total, after = after_total) v.end_time = tz('UTC').normalize(date) if (v.is_significant and v.severity >= _ALERT_SEV_MIN and v.after.count >= _MIN_COUNT_THRESHOLD): alert_count += 1 if (not debug or debug_file != sys.stdout): print "Emitting alert for %s" % v.after.sorted_metadata[0] v.emit(debug = debug, debug_file = debug_file) if alert_count <= 0: print "No alerts today" #This is super fishy but technically valid usecase. I guess leave it for now. # Now send an email, looking up each piece of feedback. if (email): email_list = set() for (k,v) in delta.iteritems(): v.set_thresholds(diff_pct = _DIFF_PCT_MIN, diff_abs = _DIFF_ABS_MIN) if (v.is_significant and v.severity >= _EMAIL_SEV_MIN and v.after.count >= _MIN_COUNT_THRESHOLD): email_list.add(v) email_results(email_list)