예제 #1
0
def send_alerts(settings, db):
    """
    BLINDLY SENDS ALERTS FROM THE ALERTS TABLE, ASSUMING ALL HAVE THE SAME STRUCTURE.
    """
    debug = settings.param.debug
    db.debug = debug

    #TODO: REMOVE, LEAVE IN DB
    if db.debug:
        db.execute("update reasons set email_subject={{subject}}, email_template={{template}} where code={{reason}}", {
            "template": CNV.object2JSON(TEMPLATE),
            "subject": CNV.object2JSON(SUBJECT),
            "reason": REASON
        })
        db.flush()

    try:
        new_alerts = db.query("""
            SELECT
                a.id alert_id,
                a.reason,
                r.description,
                a.details,
                a.severity,
                a.confidence,
                a.revision,
                r.email_template,
                r.email_subject
            FROM
                alerts a
            JOIN
                reasons r on r.code = a.reason
            WHERE
                a.last_sent IS NULL AND
                a.status <> 'obsolete' AND
                math.bayesian_add(a.severity, a.confidence) > {{alert_limit}} AND
                a.solution IS NULL AND
                a.reason in {{reasons}} AND
                a.create_time > {{min_time}}
            ORDER BY
                math.bayesian_add(a.severity, a.confidence) DESC,
                json.number(left(details, 65000), "diff_percent") DESC
            LIMIT
                10
        """, {
            "last_sent": datetime.utcnow() - RESEND_AFTER,
            "alert_limit": ALERT_LIMIT - EPSILON,
            "min_time": datetime.utcnow()-LOOK_BACK,
            "reasons": SQL("("+", ".join(db.quote_value(v) for v in SEND_REASONS)+")")
        })

        if not new_alerts:
            if debug:
                Log.note("Nothing important to email")
            return

        for alert in new_alerts:
            #poor souls that signed up for emails
            listeners = ";".join(db.query("SELECT email FROM listeners WHERE reason={{reason}}", {"reason": alert.reason}).email)

            body = [HEADER]
            if alert.confidence >= 1:
                alert.confidence = 0.999999

            alert.details = CNV.JSON2object(alert.details)
            try:
                alert.revision = CNV.JSON2object(alert.revision)
            except Exception, e:
                pass
            alert.score = str(-log(1.0-Math.bayesian_add(alert.severity, alert.confidence), 10))  #SHOW NUMBER OF NINES
            alert.details.url = alert.details.page_url
            example = alert.details.example
            for e in alert.details.tests.example + [example]:
                if e.push_date_min:
                    e.push_date_max = (2 * e.push_date) - e.push_date_min
                    e.date_range = (datetime.utcnow()-CNV.milli2datetime(e.push_date_min)).total_seconds()/(24*60*60)  #REQUIRED FOR DATAZILLA B2G CHART REFERENCE
                    e.date_range = nvl(nvl(*[v for v in (7, 30, 60) if v > e.date_range]), 90)  #PICK FIRST v > CURRENT VALUE

            subject = expand_template(CNV.JSON2object(alert.email_subject), alert)
            body.append(expand_template(CNV.JSON2object(alert.email_template), alert))
            body = "".join(body)+FOOTER

            if debug:
                Log.note("EMAIL: {{email}}", {"email": body})

            if len(body) > MAX_EMAIL_LENGTH:
                Log.note("Truncated the email body")
                suffix = "... (has been truncated)"
                body = body[0:MAX_EMAIL_LENGTH - len(suffix)] + suffix   #keep it reasonable

            db.call("mail.send", (
                listeners, #to
                subject,
                body, #body
                None
            ))

            #I HOPE I CAN SEND ARRAYS OF NUMBERS
            db.execute(
                "UPDATE alerts SET last_sent={{time}} WHERE {{where}}", {
                    "time": datetime.utcnow(),
                    "where": esfilter2sqlwhere(db, {"terms": {"id": Q.select(new_alerts, "alert_id")}})
                })

    except Exception, e:
        Log.error("Could not send alerts", e)
def page_threshold_limit(db, debug):
    """
    simplest of rules to test the dataflow from test_run, to alert, to email
    may prove slightly useful also!
    #point out any pages that are breaking human-set threshold limits
    """
    db.debug = debug

    try:
        # CALCULATE HOW FAR BACK TO LOOK
        lasttime = db.query("SELECT last_run, description FROM reasons WHERE code={{type}}", {"type": REASON})[0]
        lasttime = nvl(lasttime.last_run, datetime.utcnow())
        min_date = lasttime + LOOK_BACK

        # FIND ALL PAGES THAT HAVE LIMITS TO TEST
        # BRING BACK ONES THAT BREAK LIMITS
        # BUT DO NOT ALREADY HAVE AN ALERTS EXISTING
        pages = db.query(
            """
            SELECT
                t.id tdad_id,
                t.n_replicates,
                t.mean,
                t.std,
                h.threshold,
                h.severity,
                h.reason,
                m.id alert_id
            FROM
                alert_page_thresholds h
            JOIN
                test_data_all_dimensions t ON t.page_id=h.page
            LEFT JOIN
                alerts m on m.tdad_id=t.test_run_id AND m.reason={{type}}
            WHERE
                h.threshold<t.mean AND
                t.push_date>{{min_date}} AND
                (m.id IS NULL OR m.status='obsol11ete')
        """,
            {"type": REASON, "min_date": min_date},
        )

        # FOR EACH PAGE THAT BREAKS LIMITS
        for page in pages:
            if page.alert_id != None:
                break

            alert = {
                "id": SQL("util.newid()"),
                "status": "new",
                "create_time": datetime.utcnow(),
                "last_updated": datetime.utcnow(),
                "tdad_id": page.tdad_id,
                "reason": REASON,
                "details": CNV.object2JSON(
                    {"expected": float(page.threshold), "actual": float(page.mean), "reason": page.reason}
                ),
                "severity": page.severity,
                "confidence": 1.0  # USING NORMAL DIST ASSUMPTION WE CAN ADJUST
                # CONFIDENCE EVEN BEFORE THRESHOLD IS HIT!
                # FOR NOW WE KEEP IT SIMPLE
            }

            db.insert("alerts", alert)

        for page in pages:
            if page.alert_id == None:
                break
            db.update("alerts", None)  # ERROR FOR NOW

        # OBSOLETE THE ALERTS THAT SHOULD NO LONGER GET SENT
        obsolete = db.query(
            """
            SELECT
                m.id,
                m.tdad_id
            FROM
                alerts m
            JOIN
                test_data_all_dimensions t ON m.tdad_id=t.id
            JOIN
                alert_page_thresholds h on t.page_id=h.page
            WHERE
                m.reason={{reason}} AND
                h.threshold>=t.mean AND
                t.push_date>{{time}}
        """,
            {"reason": REASON, "time": min_date},
        )

        if obsolete:
            db.execute(
                "UPDATE alerts SET status='obsolete' WHERE {{where}}",
                {"where": esfilter2sqlwhere(db, {"terms": {"id": Q.select(obsolete, "id")}})},
            )

        db.execute(
            "UPDATE reasons SET last_run={{now}} WHERE code={{reason}}", {"now": datetime.utcnow(), "reason": REASON}
        )

    except Exception, e:

        Log.error("Could not perform threshold comparisons", e)
        if a == None:
            Log.error("Programmer error, changed_alerts must have {{key_value}}", {"key_value": curr.tdad.id})

        if significant_difference(curr.severity, a.severity) or \
                significant_difference(curr.confidence, a.confidence) or \
                        curr.reason != a.reason:
            curr.last_updated = NOW
            alerts_db.update("alerts", {"id": curr.id}, a)

    #OBSOLETE THE ALERTS THAT ARE NO LONGER VALID
    if obsolete_alerts:
        alerts_db.execute("UPDATE alerts SET status='obsolete' WHERE {{where}}", {
            "where": esfilter2sqlwhere(
                alerts_db,
                {"and": [
                    {"terms": {"id": obsolete_alerts.id}},
                    {"not": {"term": {"status": "obsolete"}}}
                ]}
            )
        })

    alerts_db.execute("UPDATE reasons SET last_run={{now}} WHERE {{where}}", {
        "now": NOW,
        "where": esfilter2sqlwhere(alerts_db, {"term": {"code": REASON}})
    })

    alerts_db.flush()

    if debug:
        Log.note("Marking {{num}} test_run_id as 'done'", {"num": len(all_touched)})
def talos_alert_revision(settings):
    assert settings.alerts != None
    settings.db.debug = settings.param.debug
    with DB(settings.alerts) as db:
        with ESQuery(ElasticSearch(settings.query["from"])) as esq:

            dbq = DBQuery(db)
            esq.addDimension(CNV.JSON2object(File(settings.dimension.filename).read()))

            #TODO: REMOVE, LEAVE IN DB
            if db.debug:
                db.execute("update reasons set email_subject={{subject}}, email_template={{template}} where code={{reason}}", {
                    "template": CNV.object2JSON(TEMPLATE),
                    "subject": CNV.object2JSON(SUBJECT),
                    "reason": REASON
                })
                db.flush()

            #EXISTING SUSTAINED EXCEPTIONS
            existing_sustained_alerts = dbq.query({
                "from": "alerts",
                "select": "*",
                "where": {"and": [
                    {"term": {"reason": talos_sustained_median.REASON}},
                    {"not": {"term": {"status": "obsolete"}}},
                    {"range": {"create_time": {"gte": NOW - LOOK_BACK}}}
                ]}
            })

            tests = Q.index(existing_sustained_alerts, ["revision", "details.Talos.Test"])

            #EXISTING REVISION-LEVEL ALERTS
            old_alerts = dbq.query({
                "from": "alerts",
                "select": "*",
                "where": {"and": [
                    {"term": {"reason": REASON}},
                    {"or": [
                        {"terms": {"revision": set(existing_sustained_alerts.revision)}},

                        {"term": {"reason": talos_sustained_median.REASON}},
                        {"term": {"status": "obsolete"}},
                        {"range": {"create_time": {"gte": NOW - LOOK_BACK}}}
                    ]}
                ]}
            })
            old_alerts = Q.unique_index(old_alerts, "revision")

            #SUMMARIZE
            known_alerts = StructList()
            for revision in set(existing_sustained_alerts.revision):
            #FIND TOTAL TDAD FOR EACH INTERESTING REVISION
                total_tests = esq.query({
                    "from": "talos",
                    "select": {"name": "count", "aggregate": "count"},
                    "where": {"and":[
                        {"terms": {"Talos.Revision": revision}}
                    ]}
                })
                total_exceptions = tests[(revision, )]  # FILTER BY revision

                parts = StructList()
                for g, exceptions in Q.groupby(total_exceptions, ["details.Talos.Test"]):
                    worst_in_test = Q.sort(exceptions, ["confidence", "details.diff_percent"]).last()
                    example = worst_in_test.details
                    # ADD SOME DATAZILLA SPECIFIC URL PARAMETERS
                    branch = example.Talos.Branch.replace("-Non-PGO", "")
                    example.tbpl.url.branch = TBPL_PATH.get(branch, branch)
                    example.mercurial.url.branch = MECURIAL_PATH.get(branch, branch)
                    example.datazilla.url.branch = example.Talos.Branch #+ ("" if worst_in_test.Talos.Branch.pgo else "-Non-PGO")
                    example.datazilla.url.x86 = "true" if example.Talos.Platform == "x86" else "false"
                    example.datazilla.url.x86_64 = "true" if example.Talos.Platform == "x86_64" else "false"
                    example.datazilla.url.stop = nvl(example.push_date_max, (2*example.push_date) - example.push_date_min)

                    num_except = len(exceptions)
                    if num_except == 0:
                        continue

                    part = {
                        "test": g.details.Talos.Test,
                        "num_exceptions": num_except,
                        "num_tests": total_tests,
                        "confidence": worst_in_test.confidence,
                        "example": example
                    }
                    parts.append(part)

                parts = Q.sort(parts, [{"field": "confidence", "sort": -1}])
                worst_in_revision = parts[0].example

                known_alerts.append({
                    "status": "new",
                    "create_time": CNV.milli2datetime(worst_in_revision.push_date),
                    "reason": REASON,
                    "revision": revision,
                    "tdad_id": revision,
                    "details": {
                        "revision": revision,
                        "total_tests": total_tests,
                        "total_exceptions": len(total_exceptions),
                        "tests": parts,
                        "example": worst_in_revision
                    },
                    "severity": SEVERITY,
                    "confidence": worst_in_revision.result.confidence
                })

            known_alerts = Q.unique_index(known_alerts, "revision")

            #NEW ALERTS, JUST INSERT
            new_alerts = known_alerts - old_alerts
            if new_alerts:
                for revision in new_alerts:
                    revision.id = SQL("util.newid()")
                    revision.last_updated = NOW
                db.insert_list("alerts", new_alerts)

            #SHOW SUSTAINED ALERTS ARE COVERED
            db.execute("""
                INSERT INTO hierarchy (parent, child)
                SELECT
                    r.id parent,
                    p.id child
                FROM
                    alerts p
                LEFT JOIN
                    hierarchy h on h.child=p.id
                LEFT JOIN
                    alerts r on r.revision=p.revision AND r.reason={{parent_reason}}
                WHERE
                    {{where}}
            """, {
                "where": esfilter2sqlwhere(db, {"and": [
                    {"term": {"p.reason": talos_sustained_median.REASON}},
                    {"terms": {"p.revision": Q.select(existing_sustained_alerts, "revision")}},
                    {"missing": "h.parent"}
                ]}),
                "parent_reason": REASON
            })

            #CURRENT ALERTS, UPDATE IF DIFFERENT
            for known_alert in known_alerts & old_alerts:
                if len(nvl(known_alert.solution, "").strip()) != 0:
                    continue  # DO NOT TOUCH SOLVED ALERTS

                old_alert = old_alerts[known_alert]
                if old_alert.status == 'obsolete' or significant_difference(known_alert.severity, old_alert.severity) or significant_difference(known_alert.confidence, old_alert.confidence):
                    known_alert.last_updated = NOW
                    db.update("alerts", {"id": old_alert.id}, known_alert)

            #OLD ALERTS, OBSOLETE
            for old_alert in old_alerts - known_alerts:
                if old_alert.status == 'obsolete':
                    continue
                db.update("alerts", {"id": old_alert.id}, {"status": "obsolete", "last_updated": NOW, "details":None})