Beispiel #1
0
def _aggregate(where, delta, is_base, now,
               old, new, comments = None):
    global _INPUT_DB
    _INPUT_DB = _INPUT_DB if _INPUT_DB else inputDb('input_mozilla_org_new')

    sql = '''
        SELECT
            description,
            id
        FROM feedback_response fr
        WHERE
            %s
            AND locale = "en-US"
            AND happy = 0
            AND (campaign IS NULL OR campaign = "")
            AND (source   IS NULL OR source   = "")
            AND (version NOT RLIKE "[^a.0-9]")
        ;
    ''' % where
    try:
        results = _INPUT_DB.execute_sql(sql, old = old, new = new,
                                       now = now)
    except (OperationalError):
        #TODO(rrayborn): raise an alert instead of just warning.
        warn('Database timed out executing base sql.')
        return

    total = 0
    for row in results:
        # Tokenize the row into delta and store comments for retreival in comments
        (word_dict, value) = tokenize(row.description, input_id = row.id)
        if comments is not None:
            comments[row.id] = row.description
        if value > 0:
            for (key, word_set) in word_dict.iteritems():
                if (key is None) or not re.match('\S', key):
                    continue
                data_set = delta[key].base if is_base else delta[key].after
                data_set.insert(key = key, link = (row.id, value),
                                meta = word_set)
            total += 1
    return total
Beispiel #2
0
def _aggregate(where, delta, is_base, now, old, new, comments=None):
    global _INPUT_DB
    _INPUT_DB = _INPUT_DB if _INPUT_DB else inputDb('input_mozilla_org_new')

    sql = '''
        SELECT
            description,
            id
        FROM feedback_response fr
        WHERE
            %s
            AND locale = "en-US"
            AND happy = 0
            AND (campaign IS NULL OR campaign = "")
            AND (source   IS NULL OR source   = "")
            AND (version NOT RLIKE "[^a.0-9]")
        ;
    ''' % where
    try:
        results = _INPUT_DB.execute_sql(sql, old=old, new=new, now=now)
    except (OperationalError):
        #TODO(rrayborn): raise an alert instead of just warning.
        warn('Database timed out executing base sql.')
        return

    total = 0
    for row in results:
        # Tokenize the row into delta and store comments for retreival in comments
        (word_dict, value) = tokenize(row.description, input_id=row.id)
        if comments is not None:
            comments[row.id] = row.description
        if value > 0:
            for (key, word_set) in word_dict.iteritems():
                if (key is None) or not re.match('\S', key):
                    continue
                data_set = delta[key].base if is_base else delta[key].after
                data_set.insert(key=key, link=(row.id, value), meta=word_set)
            total += 1
    return total
Beispiel #3
0
def process_alerts(product,
                   now = datetime.now(), old = _PAST_TIMEFRAME, new = None,
                   debug = False, debug_file = sys.stdout,
                   send_email = True, address = None):
    delta = defaultdict(WordDeltaCounter)

    # Resolve date
    if not isinstance(now, datetime) and isinstance(now, date):
        now = datetime.combine(now, time(0,0,0))
    #if not isinstance(now, datetime):
        # I don't feel like checking this. It's not a likely exception.
        #raise Exception('"now" must me of type datetime or date.')

    now_string = now.strftime('%Y-%m-%d %H:%M:%S')
    now = tz('US/Pacific').localize(now)

    # Product related vars
    if product.lower() == 'desktop':
        new           = new if new else _DESKTOP_TIMEFRAME
        where_product = ('product = "firefox"' +
                         '\nAND LEFT(platform,7) IN("Windows","OS X","Linux")')
        flavor        = 'word-based'
        subject       = 'Desktop Input Alert'
        address       = address if address else _DESKTOP_EMAIL
    elif product.lower() == 'android':
        new           = new if new else _ANDROID_TIMEFRAME
        where_product = 'product = "Firefox for Android"'
        flavor        = 'android-word-based'
        subject       = 'Android Input Alert'
        address       = address if address else _ANDROID_EMAIL
    else:
        raise Exception('product must be "desktop" or "android".')

    # Resolve debug info
    if debug and not isinstance(debug_file, file):
            warn('Debug file should be type <file>, outputting to stdout.')
            debug_file = sys.stdout
    is_print = not debug or debug_file != sys.stdout


    if is_print:
        print 'Generating %s for %s' % (subject, now_string)

    # Retrieve old timeframe
    where = (where_product +
             '\nAND created > DATE_SUB(:now, INTERVAL :old WEEK)' +
             '\nAND created < DATE_SUB(:now, INTERVAL :new HOUR)')
    base_total = _aggregate(where, delta, True, now_string, old, new)

    # Retrieve new timeframe
    after_comments = {}
    where = (where_product +
             '\nAND created > DATE_SUB(:now, INTERVAL :new HOUR)' +
             '\nAND created < :now')
    after_total = _aggregate(where, delta, False, now_string, old, new,
                             comments = after_comments)

    if (after_total < _MIN_DENOM_THRESHOLD or
                base_total < _MIN_DENOM_THRESHOLD):
        warn('NOT ENOUGH FEEDBACK %d before and %d after' % (base_total,
                                                             after_total))
        return


    #Generate alerts
    alerted_feedback = {}

    # Determine if we should alert for each word and add the alert feedback to a
    # dict for spam detection
    for (k,v) in delta.iteritems():
        v.set_thresholds(diff_pct = _DIFF_PCT_MIN, diff_abs = _DIFF_ABS_MIN)
        v.set_potentials(base = base_total, after = after_total)
        v.end_time = tz('UTC').normalize(now)
        if (v.is_significant and v.severity >= _ALERT_SEV_MIN
                and v.after.count >= _MIN_COUNT_THRESHOLD):
            for link_item in v.after.link_list:
                alerted_feedback[link_item[0]] = link_item[1]
            v.alert = True

    # Find spam
    test_spam = { x: after_comments[x] for x in alerted_feedback.keys() }
    spam = SpamDetector().check_entries_for_spam(test_spam)
    # Remove spam
    after_total -= len(spam.keys())
    for (k,v) in delta.iteritems():
        if (v.alert):
            for s in spam.keys():
                if s in v.after.link_list:
                    v.after.remove(link = (s, alerted_feedback[s]))
                    v.alert = False

    # Reprocess alerts while removing spam
    has_alerts = False

    email_list = set()
    for (k,v) in delta.iteritems():
        v.set_potentials(base = base_total, after = after_total)
        if v.is_significant and v.after.count >= _MIN_COUNT_THRESHOLD:
            if v.severity >= _ALERT_SEV_MIN:
                if is_print:
                    print 'Emitting alert for %s' % v.after.sorted_metadata[0]
                v.emit(timeframe = new, flavor = flavor,
                       debug = debug, debug_file = debug_file)
                has_alerts = True
            if send_email and v.severity >= _EMAIL_SEV_MIN:
                email_list.add(v)


    if not has_alerts:
        # This is super fishy but technically valid usecase.
        # Might alert on this in the future
        if is_print:
            print 'No alerts today'
        return

    # Now send an email, looking up each piece of feedback.
    if email_list:
        _email_results(email_list, subject, address, after_comments)
Beispiel #4
0
    def emit(self, timeframe = None, flavor = 'unknown',
             debug = False, debug_file = sys.stdout):
        if debug:
            if isinstance(debug_file, file):
                self.log_to_csv(debug_file)
            else:
                warn('Debug file should be type <file>, outputting to stdout')
                self.log_to_csv(sys.stdout)
            return
        headers = {
            'content-type': 'application/json',
            'accept': 'application/json; indent=4',
            'Fjord-Authorization': 'Token ' + _ALERT_TOKEN,
        }
        timediff = timedelta(hours = timeframe)
        start_time = self.end_time - timediff
        link_list = list(self.after.link_list)
        link_list.sort(key = lambda x:(x[1], x[0]), reverse=True)
        link_list = link_list[:_MAX_ALERT_LINKS]
        links = []
        for link in link_list:
            links.append({
                    'name': 'Input Link',
                    'url' : 'http://input.mozilla.org/dashboard/response/' + \
                            str(link[0])
            })
        description = dedent('''

            Trending words: %s

            Before: %.2f/1000
            After %.2f/1000
            Absolute Difference: %.2f %%age points
            Percent Difference: %.2f %%
            Total feedback in the past %d hours: %d

        '''%(
            ', '.join(self.after.sorted_metadata),
            self.base_pct * 10,
            self.after_pct * 10,
            self.diff_abs * 10,
            self.diff_pct,
            timeframe,
            len(self.after.link_list)

        )).strip()
        payload = {
            'severity':         self.severity,
            'summary': '%s is trending up by %.2f'%\
                (self.after.sorted_metadata[0], self.diff_pct),
            'description':      description,
            'flavor':           flavor,
            'emitter_name':     'input_word_alert',
            'emitter_version':  _VERSION,
            'links':            links,
            'start_time':       start_time.isoformat(),
            'end_time':         self.end_time.isoformat()
        }
        resp = requests.post(
            'https://input.mozilla.org/api/v1/alerts/alert/',
            data=json.dumps(payload),
            headers=headers
        )
        if resp.status_code == 201:
            print 'All systems good. Submitted alert for %s' % \
                (self.after.sorted_metadata[0])
        else:
            print 'Failed to submit alert for %s' % \
                    (self.after.sorted_metadata[0])
            print resp.json()['detail']
Beispiel #5
0
def process_alerts(product,
                   now=datetime.now(),
                   old=_PAST_TIMEFRAME,
                   new=None,
                   debug=False,
                   debug_file=sys.stdout,
                   send_email=True,
                   address=None):
    delta = defaultdict(WordDeltaCounter)

    # Resolve date
    if not isinstance(now, datetime) and isinstance(now, date):
        now = datetime.combine(now, time(0, 0, 0))
    #if not isinstance(now, datetime):
    # I don't feel like checking this. It's not a likely exception.
    #raise Exception('"now" must me of type datetime or date.')

    now_string = now.strftime('%Y-%m-%d %H:%M:%S')
    now = tz('US/Pacific').localize(now)

    # Product related vars
    if product.lower() == 'desktop':
        new = new if new else _DESKTOP_TIMEFRAME
        where_product = ('product = "firefox"' +
                         '\nAND LEFT(platform,7) IN("Windows","OS X","Linux")')
        flavor = 'word-based'
        subject = 'Desktop Input Alert'
        address = address if address else _DESKTOP_EMAIL
    elif product.lower() == 'android':
        new = new if new else _ANDROID_TIMEFRAME
        where_product = 'product = "Firefox for Android"'
        flavor = 'android-word-based'
        subject = 'Android Input Alert'
        address = address if address else _ANDROID_EMAIL
    else:
        raise Exception('product must be "desktop" or "android".')

    # Resolve debug info
    if debug and not isinstance(debug_file, file):
        warn('Debug file should be type <file>, outputting to stdout.')
        debug_file = sys.stdout
    is_print = not debug or debug_file != sys.stdout

    if is_print:
        print 'Generating %s for %s' % (subject, now_string)

    # Retrieve old timeframe
    where = (where_product +
             '\nAND created > DATE_SUB(:now, INTERVAL :old WEEK)' +
             '\nAND created < DATE_SUB(:now, INTERVAL :new HOUR)')
    base_total = _aggregate(where, delta, True, now_string, old, new)

    # Retrieve new timeframe
    after_comments = {}
    where = (where_product +
             '\nAND created > DATE_SUB(:now, INTERVAL :new HOUR)' +
             '\nAND created < :now')
    after_total = _aggregate(where,
                             delta,
                             False,
                             now_string,
                             old,
                             new,
                             comments=after_comments)

    if (after_total < _MIN_DENOM_THRESHOLD
            or base_total < _MIN_DENOM_THRESHOLD):
        warn('NOT ENOUGH FEEDBACK %d before and %d after' %
             (base_total, after_total))
        return

    #Generate alerts
    alerted_feedback = {}

    # Determine if we should alert for each word and add the alert feedback to a
    # dict for spam detection
    for (k, v) in delta.iteritems():
        v.set_thresholds(diff_pct=_DIFF_PCT_MIN, diff_abs=_DIFF_ABS_MIN)
        v.set_potentials(base=base_total, after=after_total)
        v.end_time = tz('UTC').normalize(now)
        if (v.is_significant and v.severity >= _ALERT_SEV_MIN
                and v.after.count >= _MIN_COUNT_THRESHOLD):
            for link_item in v.after.link_list:
                alerted_feedback[link_item[0]] = link_item[1]
            v.alert = True

    # Find spam
    test_spam = {x: after_comments[x] for x in alerted_feedback.keys()}
    spam = SpamDetector().check_entries_for_spam(test_spam)
    # Remove spam
    after_total -= len(spam.keys())
    for (k, v) in delta.iteritems():
        if (v.alert):
            for s in spam.keys():
                if s in v.after.link_list:
                    v.after.remove(link=(s, alerted_feedback[s]))
                    v.alert = False

    # Reprocess alerts while removing spam
    has_alerts = False

    email_list = set()
    for (k, v) in delta.iteritems():
        v.set_potentials(base=base_total, after=after_total)
        if v.is_significant and v.after.count >= _MIN_COUNT_THRESHOLD:
            if v.severity >= _ALERT_SEV_MIN:
                if is_print:
                    print 'Emitting alert for %s' % v.after.sorted_metadata[0]
                v.emit(timeframe=new,
                       flavor=flavor,
                       debug=debug,
                       debug_file=debug_file)
                has_alerts = True
            if send_email and v.severity >= _EMAIL_SEV_MIN:
                email_list.add(v)

    if not has_alerts:
        # This is super fishy but technically valid usecase.
        # Might alert on this in the future
        if is_print:
            print 'No alerts today'
        return

    # Now send an email, looking up each piece of feedback.
    if email_list:
        _email_results(email_list, subject, address, after_comments)
Beispiel #6
0
    def emit(self,
             timeframe=None,
             flavor='unknown',
             debug=False,
             debug_file=sys.stdout):
        if debug:
            if isinstance(debug_file, file):
                self.log_to_csv(debug_file)
            else:
                warn('Debug file should be type <file>, outputting to stdout')
                self.log_to_csv(sys.stdout)
            return
        headers = {
            'content-type': 'application/json',
            'accept': 'application/json; indent=4',
            'Fjord-Authorization': 'Token ' + _ALERT_TOKEN,
        }
        timediff = timedelta(hours=timeframe)
        start_time = self.end_time - timediff
        link_list = list(self.after.link_list)
        link_list.sort(key=lambda x: (x[1], x[0]), reverse=True)
        link_list = link_list[:_MAX_ALERT_LINKS]
        links = []
        for link in link_list:
            links.append({
                    'name': 'Input Link',
                    'url' : 'http://input.mozilla.org/dashboard/response/' + \
                            str(link[0])
            })
        description = dedent('''

            Trending words: %s

            Before: %.2f/1000
            After %.2f/1000
            Absolute Difference: %.2f %%age points
            Percent Difference: %.2f %%
            Total feedback in the past %d hours: %d

        ''' % (', '.join(self.after.sorted_metadata), self.base_pct * 10,
               self.after_pct * 10, self.diff_abs * 10, self.diff_pct,
               timeframe, len(self.after.link_list))).strip()
        payload = {
            'severity':         self.severity,
            'summary': '%s is trending up by %.2f'%\
                (self.after.sorted_metadata[0], self.diff_pct),
            'description':      description,
            'flavor':           flavor,
            'emitter_name':     'input_word_alert',
            'emitter_version':  _VERSION,
            'links':            links,
            'start_time':       start_time.isoformat(),
            'end_time':         self.end_time.isoformat()
        }
        resp = requests.post('https://input.mozilla.org/api/v1/alerts/alert/',
                             data=json.dumps(payload),
                             headers=headers)
        if resp.status_code == 201:
            print 'All systems good. Submitted alert for %s' % \
                (self.after.sorted_metadata[0])
        else:
            print 'Failed to submit alert for %s' % \
                    (self.after.sorted_metadata[0])
            print resp.json()['detail']
Beispiel #7
0
def process_alerts(date = None, debug = False, debug_file = sys.stdout, email = True):
    input_db = inputDb('input_mozilla_org_new')
    
    delta = defaultdict(WordDeltaCounter)
    base_total = 0
    after_total = 0
    
    if (date is None) :
        date = dt.datetime.now()    
    if (isinstance(date, dt.datetime)):
        pass
    elif (isinstance(date, dt.date)):
        date = dt.datetime.combine(date, dt.time(0,0,0))
    
    date_string = date.strftime('%Y-%m-%d %H:%M:%S')
    date = tz('US/Pacific').localize(date)
    

    if debug:
        if not isinstance(debug_file, file):
            warn("Debug file should be type <file>, outputting to stdout instead")
            debug_file = sys.stdout

    if (not debug or debug_file != sys.stdout):
        print "Generating alerts for " + date_string

    old_data_sql = """
        SELECT description, MIN(id) as id
        FROM feedback_response fr
        WHERE
        created > DATE_SUB(:now, INTERVAL :old WEEK) AND
        created < DATE_SUB(:now, INTERVAL :new HOUR)
        AND product LIKE 'firefox'
        AND locale = 'en-US'
        AND happy = 0
        AND (campaign IS NULL or campaign = '')
        AND (source IS NULL or source = '')
        AND (version NOT RLIKE '[^a.0-9]')
        AND (platform LIKE 'Windows%' OR platform LIKE 'OS X' OR platform LIKE 'Linux')
        GROUP BY 1
    """
    try:
        results = input_db.execute_sql(old_data_sql, old=_PAST_TIMEFRAME, new=_TIMEFRAME, now = date_string)
    except (OperationalError):
        warn("Database timed out executing base sql.")
        #TODO: raise an alert instead of just printing.
        return
    
    for row in results:
        (word_dict, value) = tokenize(row.description)
        if value == 0:
            continue
        for (key, word_set) in word_dict.iteritems():
            if (key is None) or not re.match('\S', key):
                continue
            delta[key].base.insert(key = key, link = (row.id, value), meta = word_set)
        base_total += 1

    new_data_sql = """
        SELECT description, MIN(id) as id
        FROM feedback_response fr
        WHERE
        created > DATE_SUB(:now, INTERVAL :new HOUR) AND
        created < :now
        AND product LIKE 'firefox'
        AND locale = 'en-US'
        AND happy = 0
        AND (campaign IS NULL or campaign = '')
        AND (source IS NULL or source = '')
        AND (version NOT RLIKE '[^a.0-9]')
        AND (platform LIKE 'Windows%' OR platform LIKE 'OS X' OR platform LIKE 'Linux')
        GROUP BY 1
    """
    try:
        results = input_db.execute_sql(new_data_sql, new=_TIMEFRAME, now = date_string)
    except (OperationalError):
        warn("Database timed out executing after sql.")
        return

    for row in results:
        (word_dict, value) = tokenize(row.description)
        if value == 0:
            continue
        for (key, word_set) in word_dict.iteritems():
            if (key is None) or not re.match('\S', key):
                continue
            delta[key].after.insert(key = key, link = (row.id, value), meta = word_set)
        after_total += 1   
    
    if (after_total < _MIN_DENOM_THRESHOLD or base_total < _MIN_DENOM_THRESHOLD):
        warn("NOT ENOUGH FEEDBACK %d before and %d after" % (base_total, after_total))
        return
    
    #Generate alerts
    
    alert_count = 0
    
    for (k,v) in delta.iteritems():
        v.set_thresholds(diff_pct = _DIFF_PCT_MIN, diff_abs = _DIFF_ABS_MIN)
        v.set_potentials(base = base_total, after = after_total)
        v.end_time = tz('UTC').normalize(date)
        if (v.is_significant and v.severity >= _ALERT_SEV_MIN
                and v.after.count >= _MIN_COUNT_THRESHOLD):
            alert_count += 1
            if (not debug or debug_file != sys.stdout):
                print "Emitting alert for %s" % v.after.sorted_metadata[0]
            v.emit(debug = debug, debug_file = debug_file)
    
    if alert_count <= 0:
        print "No alerts today"
        #This is super fishy but technically valid usecase. I guess leave it for now.
            
        # Now send an email, looking up each piece of feedback.
    if (email):
        email_list = set()
    
        for (k,v) in delta.iteritems():
            v.set_thresholds(diff_pct = _DIFF_PCT_MIN, diff_abs = _DIFF_ABS_MIN)
            if (v.is_significant and v.severity >= _EMAIL_SEV_MIN
                and v.after.count >= _MIN_COUNT_THRESHOLD):
                email_list.add(v)
        email_results(email_list)