def help_send_alerts(self, to_list):
        try:
            self.setup(to_list)

            ########################################################################
            # TEST
            ########################################################################
            send_alerts(
                settings=struct.wrap({"param": {"debug": True}}),
                db=self.db
            )

            ########################################################################
            # VERIFY
            ########################################################################
            emails = self.get_new_emails() # id, to, body

            if len(to_list) == 0:
                assert len(emails) == 0
                return

            #VERIFY ONE MAIL SENT
            assert len(emails) == 1
            #VERIFY to MATCHES WHAT WAS PASSED TO THIS FUNCTION
            assert set(emails[0].to) == set(to_list), "mail.delivery not matching what's send"

            #VERIFY last_sent IS WRITTEN
            alert_state = self.db.query("""
                SELECT
                    id
                FROM
                    alerts
                WHERE
                    reason={{reason}} AND
                    last_sent>={{send_time}}
            """, {
                "reason": self.reason,
                "send_time": self.now
            })
            expected_marked = set([d.id for d in self.test_data if CNV.JSON2object(d.details).expect == 'pass'])
            actual_marked = set(Q.select(alert_state, "id"))
            assert expected_marked == actual_marked, expand_template(
                "Expecting only id in {{expected}}, but instead got {{actual}}", {
                    "expected": str(expected_marked),
                    "actual": str(actual_marked)
                })

            #VERIFY BODY HAS THE CORRECT ALERTS
            expecting_alerts = set([d.id for d in map(lambda d: CNV.JSON2object(d.details), self.test_data) if d.expect == 'pass'])
            actual_alerts_sent = set([
                CNV.value2int(between(b, ">>>>", "<<<<"))
                for b in emails[0].body.split(dzAlerts.daemons.alert.SEPARATOR)
                if CNV.value2int(between(b, ">>>>", "<<<<")) != None
            ])
            assert expecting_alerts == actual_alerts_sent
        except Exception, e:
            Log.error("Test failure", e)
def main():
    settings = startup.read_settings()
    Log.start(settings.debug)
    try:
        for repo in settings.param.repos:
            with DB(settings.database) as db:
                try:
                    pull_repo(repo)

                    #GET LATEST DATE
                    existing_range = db.query("""
                        SELECT
                            max(`date`) `max`,
                            min(`date`) `min`,
                            min(revision) min_rev,
                            max(revision) max_rev
                        FROM
                            changesets
                        WHERE
                            repo={{repo}}
                    """, {"repo": repo.name})[0]

                    ranges = struct.wrap([
                        {"min": nvl(existing_range.max, CNV.milli2datetime(0)) + timedelta(0, 1)},
                        {"max": existing_range.min}
                    ])

                    for r in ranges:
                        for g, docs in Q.groupby(get_changesets(date_range=r, repo=repo), size=100):
                            for doc in docs:
                                doc.file_changes = None
                                doc.file_adds = None
                                doc.file_dels = None
                                doc.description = doc.description[0:16000]

                            db.insert_list("changesets", docs)
                            db.flush()

                    missing_revisions = sql.find_holes(db, "changesets", "revision", {"term":{"repo":repo.name}}, {"min": 0, "max": existing_range.max_rev + 1})
                    for _range in missing_revisions:
                        for g, docs in Q.groupby(get_changesets(revision_range=_range, repo=repo), size=100):
                            for doc in docs:
                                doc.file_changes = None
                                doc.file_adds = None
                                doc.file_dels = None
                                doc.description = doc.description[0:16000]

                            db.insert_list("changesets", docs)
                            db.flush()



                except Exception, e:
                    Log.warning("Failure to pull from {{repo.name}}", {"repo":repo}, e)
    finally:
        Log.stop()
def test_1(settings):
    test_data1 = struct.wrap({
        "header": ("date", "count", "mean-std", "mean", "mean+std", "reject"),
        "rows": [
            ("2013-Apr-05 13:55:00", "23", "655.048136994614", "668.5652173913044", "682.0822977879948"),
            ("2013-Apr-05 13:59:00", "23", "657.8717192954238", "673.3478260869565", "688.8239328784892"),
            ("2013-Apr-05 14:05:00", "23", "658.3247270429598", "673", "687.6752729570402"),
            ("2013-Apr-05 14:08:00", "23", "658.5476631609771", "673.6521739130435", "688.7566846651099"),
            ("2013-Apr-05 14:16:00", "23", "653.2311994952266", "666.1739130434783", "679.1166265917299"),
            ("2013-Apr-05 14:26:00", "23", "659.5613845589426", "671.8260869565217", "684.0907893541009"),
            ("2013-Apr-05 14:42:00", "23", "662.3517791831357", "677.1739130434783", "691.9960469038208"),
            ("2013-Apr-05 15:26:00", "23", "659.8270045518033", "672", "684.1729954481967"),
            ("2013-Apr-05 15:30:00", "23", "659.4023663187861", "674", "688.5976336812139"),
            ("2013-Apr-05 15:32:00", "23", "652.8643631817508", "666.9565217391304", "681.0486802965099"),
            ("2013-Apr-05 15:35:00", "23", "661.6037178485499", "675.1739130434783", "688.7441082384066"),
            ("2013-Apr-05 15:39:00", "23", "658.0124378440726", "670.1304347826087", "682.2484317211449"),
            ("2013-Apr-05 16:20:00", "46", "655.9645219644624", "667.4782608695652", "678.9919997746681"),
            ("2013-Apr-05 16:30:00", "23", "660.2572506418051", "671.8695652173913", "683.4818797929775"),
            ("2013-Apr-05 16:31:00", "23", "661.011102554583", "673.4347826086956", "685.8584626628083"),
            ("2013-Apr-05 16:55:00", "23", "655.9407699325201", "671.304347826087", "686.6679257196539"),
            ("2013-Apr-05 17:07:00", "23", "657.6412277100247", "667.5217391304348", "677.4022505508448"),
            #        ("2013-Apr-05 17:12:00", "23", "598.3432138277318", "617.7391304347826", "637.1350470418334"),   # <--DIP IN DATA
            ("2013-Apr-05 17:23:00", "23", "801.0537973113723", "822.1739130434783", "843.2940287755843", 1)  # <--SPIKE IN DATA
        ]
    })
    test_data1 = [
        struct.wrap({
            "timestamp": CNV.datetime2unix(CNV.string2datetime(t.date, "%Y-%b-%d %H:%M:%S")),
            "datetime": CNV.string2datetime(t.date, "%Y-%b-%d %H:%M:%S"),
            "count": int(t.count),
            "mean": float(t.mean),
            "variance": pow(float(t["mean+std"]) - float(t.mean), 2),
            "reject": t.reject
        })
        for t in CNV.table2list(test_data1.header, test_data1.rows)
    ]

    with DB(settings.perftest) as db:
        tester = test_alert_exception(db)
        tester.test_alert_generated(settings, test_data1)
def not_test_2(settings):
    """
    THIS WAS TESTING FOR A DECREASE IN THE MEAN, BUT THE CURRENT CODE IGNORES THOSE
    """
    test_data2 = struct.wrap({
        "header": ("timestamp", "mean", "std", "h0_rejected", "count"),
        "rows": [
            (1366388389, 295.36, 32.89741631, 0, 25),
            (1366387915, 307.92, 32.86198412, 0, 25),
            (1366390777, 309, 41.22802445, 0, 25),
            (1366398771, 309.24, 34.18488945, 0, 25),
            (1366401499, 308.2, 30.36170834, 0, 25),
            (1366412504, 192.8, 46.27634385, 1, 25), # Should be an alert
            (1366421699, 298.04, 29.09249617, 0, 25),
            (1366433920, 324.52, 28.13378752, 0, 25),
            (1366445744, 302.2, 28.19131072, 0, 25),
            (1366455408, 369.96, 31.25363979, 0, 25),
            (1366474119, 313.12, 33.66541252, 0, 25),
            (1366483789, 369.96, 30.81460693, 0, 25),
            (1366498412, 311.76, 36.02462121, 0, 25),
            (1366507773, 291.08, 27.86562996, 0, 25)
        ]
    })
    test_data2 = [
        struct.wrap({
            "timestamp": t.timestamp,
            "datetime": CNV.unix2datetime(t.timestamp),
            "count": t.count,
            "mean": t.mean,
            "variance": pow(t.std, 2),
            "reject": t.h0_rejected
        })
        for t in CNV.table2list(test_data2.header, test_data2.rows)
    ]

    with DB(settings.perftest) as db:
        tester = test_alert_exception(db)
        tester.test_alert_generated(test_data2)
    def _setup(self):
        uid = self.db.query("SELECT util.newid() uid FROM DUAL")[0].uid

        ## VERFIY THE alert_reason EXISTS
        exists = self.db.query("""
            SELECT
                count(1) num
            FROM
                reasons
            WHERE
                code={{reason}}
            """,
                               {"reason": REASON}
        )[0].num
        if exists == 0:
            Log.error("Expecting the database to have an alert_reason={{reason}}", {"reason": REASON})

        ## MAKE A 'PAGE' TO TEST
        self.db.execute("DELETE FROM pages")
        self.db.insert("pages", {
            "test_id": 0,
            "url": self.url
        })
        self.page_id = self.db.query("SELECT id FROM pages")[0].id

        ## ADD A THRESHOLD TO TEST WITH
        self.db.execute("""
            INSERT INTO alert_page_thresholds (
                id,
                page,
                threshold,
                severity,
                reason,
                time_added,
                contact
            ) VALUES (
                {{uid}},
                {{page_id}},
                {{threshold}},
                {{severity}},
                concat("(", {{url}}, ") for test"),
                now(),
                "*****@*****.**"
            )
            """, {
            "uid": uid,
            "url": self.url,
            "page_id": self.page_id,
            "severity": self.severity,
            "threshold": 800
        })

        ## ENSURE THERE ARE NO ALERTS IN DB
        self.db.execute("DELETE FROM alerts WHERE reason={{reason}}", {"reason": REASON})

        ## diff_time IS REQUIRED TO TRANSLATE THE TEST DATE DATES TO SOMETHING MORE CURRENT
        now_time = CNV.datetime2unix(datetime.utcnow())
        max_time = max([CNV.datetime2unix(CNV.string2datetime(t.date, "%Y-%b-%d %H:%M:%S")) for t in CNV.table2list(self.test_data.header, self.test_data.rows)])
        diff_time = now_time - max_time

        ## INSERT THE TEST RESULTS
        for t in CNV.table2list(self.test_data.header, self.test_data.rows):
            time = CNV.datetime2unix(CNV.string2datetime(t.date, "%Y-%b-%d %H:%M:%S"))
            time += diff_time

            self.db.insert("test_data_all_dimensions", {
                "id": SQL("util.newid()"),
                "test_run_id": SQL("util.newid()"),
                "product_id": 0,
                "operating_system_id": 0,
                "test_id": 0,
                "page_id": self.page_id,
                "date_received": time,
                "revision": "ba928cbd5191",
                "product": "Firefox",
                "branch": "Mozilla-Inbound",
                "branch_version": "23.0a1",
                "operating_system_name": "mac",
                "operating_system_version": "OS X 10.8",
                "processor": "x86_64",
                "build_type": "opt",
                "machine_name": "talos-mtnlion-r5-049",
                "pushlog_id": 19998363,
                "push_date": time,
                "test_name": "tp5o",
                "page_url": self.url,
                "mean": float(t.mean),
                "std": float(t["mean+std"]) - float(t.mean),
                "h0_rejected": 0,
                "p": None,
                "n_replicates": t.count,
                "fdr": 0,
                "trend_mean": None,
                "trend_std": None,
                "test_evaluation": 0,
                "status": 1
            })
Example #6
0
def send_alerts(settings, db):
    """
    BLINDLY SENDS ALERTS FROM THE ALERTS TABLE, ASSUMING ALL HAVE THE SAME STRUCTURE.
    """
    debug = settings.param.debug
    db.debug = debug

    #TODO: REMOVE, LEAVE IN DB
    if db.debug:
        db.execute("update reasons set email_subject={{subject}}, email_template={{template}} where code={{reason}}", {
            "template": CNV.object2JSON(TEMPLATE),
            "subject": CNV.object2JSON(SUBJECT),
            "reason": REASON
        })
        db.flush()

    try:
        new_alerts = db.query("""
            SELECT
                a.id alert_id,
                a.reason,
                r.description,
                a.details,
                a.severity,
                a.confidence,
                a.revision,
                r.email_template,
                r.email_subject
            FROM
                alerts a
            JOIN
                reasons r on r.code = a.reason
            WHERE
                a.last_sent IS NULL AND
                a.status <> 'obsolete' AND
                math.bayesian_add(a.severity, a.confidence) > {{alert_limit}} AND
                a.solution IS NULL AND
                a.reason in {{reasons}} AND
                a.create_time > {{min_time}}
            ORDER BY
                math.bayesian_add(a.severity, a.confidence) DESC,
                json.number(left(details, 65000), "diff_percent") DESC
            LIMIT
                10
        """, {
            "last_sent": datetime.utcnow() - RESEND_AFTER,
            "alert_limit": ALERT_LIMIT - EPSILON,
            "min_time": datetime.utcnow()-LOOK_BACK,
            "reasons": SQL("("+", ".join(db.quote_value(v) for v in SEND_REASONS)+")")
        })

        if not new_alerts:
            if debug:
                Log.note("Nothing important to email")
            return

        for alert in new_alerts:
            #poor souls that signed up for emails
            listeners = ";".join(db.query("SELECT email FROM listeners WHERE reason={{reason}}", {"reason": alert.reason}).email)

            body = [HEADER]
            if alert.confidence >= 1:
                alert.confidence = 0.999999

            alert.details = CNV.JSON2object(alert.details)
            try:
                alert.revision = CNV.JSON2object(alert.revision)
            except Exception, e:
                pass
            alert.score = str(-log(1.0-Math.bayesian_add(alert.severity, alert.confidence), 10))  #SHOW NUMBER OF NINES
            alert.details.url = alert.details.page_url
            example = alert.details.example
            for e in alert.details.tests.example + [example]:
                if e.push_date_min:
                    e.push_date_max = (2 * e.push_date) - e.push_date_min
                    e.date_range = (datetime.utcnow()-CNV.milli2datetime(e.push_date_min)).total_seconds()/(24*60*60)  #REQUIRED FOR DATAZILLA B2G CHART REFERENCE
                    e.date_range = nvl(nvl(*[v for v in (7, 30, 60) if v > e.date_range]), 90)  #PICK FIRST v > CURRENT VALUE

            subject = expand_template(CNV.JSON2object(alert.email_subject), alert)
            body.append(expand_template(CNV.JSON2object(alert.email_template), alert))
            body = "".join(body)+FOOTER

            if debug:
                Log.note("EMAIL: {{email}}", {"email": body})

            if len(body) > MAX_EMAIL_LENGTH:
                Log.note("Truncated the email body")
                suffix = "... (has been truncated)"
                body = body[0:MAX_EMAIL_LENGTH - len(suffix)] + suffix   #keep it reasonable

            db.call("mail.send", (
                listeners, #to
                subject,
                body, #body
                None
            ))

            #I HOPE I CAN SEND ARRAYS OF NUMBERS
            db.execute(
                "UPDATE alerts SET last_sent={{time}} WHERE {{where}}", {
                    "time": datetime.utcnow(),
                    "where": esfilter2sqlwhere(db, {"terms": {"id": Q.select(new_alerts, "alert_id")}})
                })

    except Exception, e:
        Log.error("Could not send alerts", e)
def get_changesets(date_range=None, revision_range=None, repo=None):
    if date_range is not None:
        if date_range.max == None:
            if date_range.min == None:
                drange = ">0 0"
            else:
                drange = ">" + unicode(CNV.datetime2unix(date_range.min)) + " 0"
        else:
            if date_range.min == None:
                drange = "<" + unicode(CNV.datetime2unix(date_range.max) - 1) + " 0"
            else:
                drange = unicode(CNV.datetime2unix(date_range.min)) + " 0 to " + unicode(
                    CNV.datetime2unix(date_range.max) - 1) + " 0"


    #GET ALL CHANGESET INFO
    args = [
        "hg",
        "log",
        "--cwd",
        File(repo.directory).filename,
        "-v",
        # "-p",   #TO GET PATCH CONTENTS
        "--style",
        TEMPLATE_FILE.filename
    ]

    if date_range is not None:
        args.extend(["--date", drange])
    elif revision_range is not None:
        args.extend(["-r", str(revision_range.min)+":"+str(revision_range.max)])

    proc = subprocess.Popen(
        args,
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        bufsize=-1
    )

    def iterator():
        try:
            while True:
                try:
                    line = proc.stdout.readline()
                    if line == '':
                        proc.wait()
                        if proc.returncode:
                            Log.error("Unable to pull hg log: return code {{return_code}}", {
                                "return_code": proc.returncode
                            })
                        return
                except Exception, e:
                    Log.error("Problem getting another line", e)

                if line.strip() == "":
                    continue
                Log.note(line)


                # changeset = "{date|hgdate|urlescape}\t{node}\t{rev}\t{author|urlescape}\t{branches}\t\t\t\t{p1rev}\t{p1node}\t{parents}\t{children}\t{tags}\t{desc|urlescape}\n"
                # branch = "{branch}%0A"
                # parent = "{parent}%0A"
                # tag = "{tag}%0A"
                # child = "{child}%0A"
                (
                    date,
                    node,
                    rev,
                    author,
                    branches,
                    files,
                    file_adds,
                    file_dels,
                    p1rev,
                    p1node,
                    parents,
                    children,
                    tags,
                    desc
                ) = (CNV.latin12unicode(urllib.unquote(c)) for c in line.split("\t"))

                file_adds = set(file_adds.split("\n")) - {""}
                file_dels = set(file_dels.split("\n")) - {""}
                files = set(files.split("\n")) - set()
                doc = {
                    "repo": repo.name,
                    "date": CNV.unix2datetime(CNV.value2number(date.split(" ")[0])),
                    "node": node,
                    "revision": rev,
                    "author": author,
                    "branches": set(branches.split("\n")) - {""},
                    "file_changes": files - file_adds - file_dels - {""},
                    "file_adds": file_adds,
                    "file_dels": file_dels,
                    "parents": set(parents.split("\n")) - {""} | {p1rev+":"+p1node},
                    "children": set(children.split("\n")) - {""},
                    "tags": set(tags.split("\n")) - {""},
                    "description": desc
                }
                doc = ElasticSearch.scrub(doc)
                yield doc
        except Exception, e:
            if isinstance(e, ValueError) and e.message.startswith("need more than "):
                Log.error("Problem iterating through log ({{message}})", {
                    "message": line
                }, e)


            Log.error("Problem iterating through log", e)
    def iterator():
        try:
            while True:
                try:
                    line = proc.stdout.readline()
                    if line == '':
                        proc.wait()
                        if proc.returncode:
                            Log.error("Unable to pull hg log: return code {{return_code}}", {
                                "return_code": proc.returncode
                            })
                        return
                except Exception, e:
                    Log.error("Problem getting another line", e)

                if line.strip() == "":
                    continue
                Log.note(line)


                # changeset = "{date|hgdate|urlescape}\t{node}\t{rev}\t{author|urlescape}\t{branches}\t\t\t\t{p1rev}\t{p1node}\t{parents}\t{children}\t{tags}\t{desc|urlescape}\n"
                # branch = "{branch}%0A"
                # parent = "{parent}%0A"
                # tag = "{tag}%0A"
                # child = "{child}%0A"
                (
                    date,
                    node,
                    rev,
                    author,
                    branches,
                    files,
                    file_adds,
                    file_dels,
                    p1rev,
                    p1node,
                    parents,
                    children,
                    tags,
                    desc
                ) = (CNV.latin12unicode(urllib.unquote(c)) for c in line.split("\t"))

                file_adds = set(file_adds.split("\n")) - {""}
                file_dels = set(file_dels.split("\n")) - {""}
                files = set(files.split("\n")) - set()
                doc = {
                    "repo": repo.name,
                    "date": CNV.unix2datetime(CNV.value2number(date.split(" ")[0])),
                    "node": node,
                    "revision": rev,
                    "author": author,
                    "branches": set(branches.split("\n")) - {""},
                    "file_changes": files - file_adds - file_dels - {""},
                    "file_adds": file_adds,
                    "file_dels": file_dels,
                    "parents": set(parents.split("\n")) - {""} | {p1rev+":"+p1node},
                    "children": set(children.split("\n")) - {""},
                    "tags": set(tags.split("\n")) - {""},
                    "description": desc
                }
                doc = ElasticSearch.scrub(doc)
                yield doc
        except Exception, e:
            if isinstance(e, ValueError) and e.message.startswith("need more than "):
                Log.error("Problem iterating through log ({{message}})", {
                    "message": line
                }, e)


            Log.error("Problem iterating through log", e)
    def insert_test_results(self, test_data):
        ## diff_time IS REQUIRED TO TRANSLATE THE TEST DATE DATES TO SOMETHING MORE CURRENT
        now_time = CNV.datetime2unix(datetime.utcnow())
        max_time = max(Q.select(test_data, "timestamp"))
        diff_time = now_time - max_time

        ## INSERT THE TEST RESULTS
        for t in test_data:
            time = t.timestamp
            time += diff_time

            self.db.insert("test_data_all_dimensions", {
                "id": SQL("util.newid()"),
                "test_run_id": SQL("util.newid()"),
                "product_id": 0,
                "operating_system_id": 0,
                "test_id": 0,
                "page_id": self.page_id,
                "date_received": time,
                "revision": "ba928cbd5191",
                "product": "Firefox",
                "branch": "Mozilla-Inbound",
                "branch_version": "23.0a1",
                "operating_system_name": "mac",
                "operating_system_version": "OS X 10.8",
                "processor": "x86_64",
                "build_type": "opt",
                "machine_name": "talos-mtnlion-r5-049",
                "pushlog_id": 19998363,
                "push_date": time,
                "test_name": "tp5o",
                "page_url": self.url,
                "mean": float(t.mean),
                "std": sqrt(t.variance),
                "h0_rejected": 0,
                "p": None,
                "n_replicates": t.count,
                "fdr": 0,
                "trend_mean": None,
                "trend_std": None,
                "test_evaluation": 0,
                "status": 1
            })

        self.db.flush()
        self.db.execute("""
            INSERT INTO objectstore (id, test_run_id, date_loaded, processed_exception, branch, json_blob)
            SELECT
                {{id}},
                test_run_id,
                {{now}},
                'complete',
                branch,
                '{}'
            FROM
                test_data_all_dimensions
            GROUP BY
                test_run_id
        """, {
            "id": SQL("util.newid()"),
            "now": CNV.datetime2unix(datetime.utcnow())
        })
def alert_sustained_median(settings, qb, alerts_db):
    """
    find single points that deviate from the trend
    """
    # OBJECTSTORE = settings.objectstore.schema + ".objectstore"
    # TDAD = settings.perftest.schema + ".test_data_all_dimensions"
    TDAD = settings.query["from"]
    PUSH_DATE = "datazilla.date_loaded"

    debug = nvl(settings.param.debug, DEBUG)
    query = settings.query

    def is_bad(r):
        if settings.param.sustained_median.trigger < r.result.confidence:
            test_param = nvl(settings.param.test[literal_field(r.Talos.Test.name)], settings.param.suite[literal_field(r.Talos.Test.suite)])

            if test_param == None:
                return True

            if test_param.disable:
                return False

            if test_param.better == "higher":
                diff = -r.diff
            elif test_param.better == "lower":
                diff = r.diff
            else:
                diff = abs(r.diff)  # DEFAULT = ANY DIRECTION IS BAD

            if test_param.min_regression:
                if unicode(test_param.min_regression.strip()[-1]) == "%":
                    min_diff = Math.abs(r.past_stats.mean * float(test_param.min_regression.strip()[:-1]) / 100.0)
                else:
                    min_diff = Math.abs(float(test_param.min_regression))
            else:
                min_diff = Math.abs(r.past_stats.mean * 0.01)

            if diff > min_diff:
                return True

        return False

    with Timer("pull combinations"):
        disabled_suites = [s for s, p in settings.param.suite.items() if p.disable]
        disabled_tests = [t for t, p in settings.param.test.items() if p.disable]

        temp = Query({
            "from": TDAD,
            "select": {"name": "min_push_date", "value": PUSH_DATE, "aggregate": "min"},
            "edges": query.edges,
            "where": {"and": [
                True if settings.args.restart else {"missing": {"field": settings.param.mark_complete}},
                {"exists": {"field": "result.test_name"}},
                {"range": {PUSH_DATE: {"gte": OLDEST_TS}}},
                {"not": {"terms": {"Talos.Test.suite": disabled_suites}}},
                {"not": {"terms": {"Talos.Test.name": disabled_tests}}}
                # {"term": {"testrun.suite": "cart"}},
                # {"term": {"result.test_name": "1-customize-enter.error.TART"}},
                # {"term": {"test_machine.osversion": "OS X 10.8"}}
                #FOR DEBUGGING SPECIFIC SERIES
                # {"term": {"test_machine.type": "hamachi"}},
                # {"term": {"test_machine.platform": "Gonk"}},
                # {"term": {"test_machine.os": "Firefox OS"}},
                # {"term": {"test_build.branch": "master"}},
                # {"term": {"testrun.suite": "communications/ftu"}},
                # {"term": {"result.test_name": "startup_time"}}
            ]},
            "limit": nvl(settings.param.combo_limit, 1000)
        }, qb)

        new_test_points = qb.query(temp)

    #BRING IN ALL NEEDED DATA
    if debug:
        Log.note("Pull all data for {{num}} groups:\n{{groups.name}}", {
            "num": len(new_test_points),
            "groups": query.edges
        })

    # all_min_date = Null
    all_touched = set()
    evaled_tests = set()
    alerts = []   # PUT ALL THE EXCEPTION ITEMS HERE
    for g, test_points in Q.groupby(new_test_points, query.edges):
        if not test_points.min_push_date:
            continue
        try:
            if settings.args.restart:
                first_sample = OLDEST_TS
            else:
                first_sample = MAX(MIN(test_points.min_push_date), OLDEST_TS)
            # FOR THIS g, HOW FAR BACK IN TIME MUST WE GO TO COVER OUR WINDOW_SIZE?
            first_in_window = qb.query({
                "select": {"name": "min_date", "value": "push_date", "aggregate": "min"},
                "from": {
                    "from": TDAD,
                    "select": {"name": "push_date", "value": PUSH_DATE},
                    "where": {"and": [
                        {"term": g},
                        {"range": {PUSH_DATE: {"lt": first_sample}}}
                    ]},
                    "sort": {"field": PUSH_DATE, "sort": -1},
                    "limit": settings.param.sustained_median.window_size * 2
                }
            })
            if len(first_in_window) > settings.param.sustained_median.window_size * 2:
                do_all = False
            else:
                do_all = True

            min_date = MIN(first_sample, first_in_window.min_date)

            #LOAD TEST RESULTS FROM DATABASE
            test_results = qb.query({
                "from": {
                    "from": "talos",
                    "select": [{"name": "push_date", "value": PUSH_DATE}] +
                              query.select +
                              query.edges,
                    "where": {"and": [
                        {"term": g},
                        {"range": {PUSH_DATE: {"gte": min_date}}}
                    ]},
                },
                "sort": "push_date"
            })

            Log.note("{{num}} test results found for {{group}} dating back no further than {{start_date}}", {
                "num": len(test_results),
                "group": g,
                "start_date": CNV.milli2datetime(min_date)
            })

            if debug:
                Log.note("Find sustained_median exceptions")

            #APPLY WINDOW FUNCTIONS
            stats = Q.run({
                "from": {
                    "from": test_results,
                    "where": {"exists": {"field": "value"}}
                },
                "window": [
                    {
                        # WE DO NOT WANT TO CONSIDER THE POINTS BEFORE FULL WINDOW SIZE
                        "name": "ignored",
                        "value": lambda r, i: False if do_all or i > settings.param.sustained_median.window_size else True
                    }, {
                        # SO WE CAN SHOW A DATAZILLA WINDOW
                        "name": "push_date_min",
                        "value": lambda r: r.push_date,
                        "sort": "push_date",
                        "aggregate": windows.Min,
                        "range": {"min": -settings.param.sustained_median.window_size, "max": 0}
                    }, {
                        # SO WE CAN SHOW A DATAZILLA WINDOW
                        "name": "push_date_max",
                        "value": lambda r: r.push_date,
                        "sort": "push_date",
                        "aggregate": windows.Max,
                        "range": {"min": 0, "max": settings.param.sustained_median.window_size}
                    }, {
                        "name": "past_revision",
                        "value": lambda r, i, rows: rows[i - 1].Talos.Revision,
                        "sort": "push_date"
                    }, {
                        "name": "past_stats",
                        "value": lambda r: r.value,
                        "sort": "push_date",
                        "aggregate": windows.Stats(middle=0.60),
                        "range": {"min": -settings.param.sustained_median.window_size, "max": 0}
                    }, {
                        "name": "future_stats",
                        "value": lambda r: r.value,
                        "sort": "push_date",
                        "aggregate": windows.Stats(middle=0.60),
                        "range": {"min": 0, "max": settings.param.sustained_median.window_size}
                    }, {
                        "name": "result",
                        "value": lambda r, i, rows: median_test(
                            rows[-settings.param.sustained_median.window_size + i:i:].value,
                            rows[i:settings.param.sustained_median.window_size + i:].value,
                            interpolate=False
                        ),
                        "sort": "push_date"
                    }, {
                        "name": "diff",
                        "value": lambda r: r.future_stats.mean - r.past_stats.mean
                    }, {
                        "name": "diff_percent",
                        "value": lambda r: (r.future_stats.mean - r.past_stats.mean) / r.past_stats.mean
                    }, {
                        "name": "is_diff",
                        "value": is_bad
                    }, {
                        #USE THIS TO FILL CONFIDENCE HOLES
                        #WE CAN MARK IT is_diff KNOWING THERE IS A HIGHER CONFIDENCE
                        "name": "future_is_diff",
                        "value": lambda r, i, rows: rows[i - 1].is_diff and r.result.confidence < rows[i - 1].result.confidence,
                        "sort": "push_date"
                    }, {
                        #WE CAN MARK IT is_diff KNOWING THERE IS A HIGHER CONFIDENCE
                        "name": "past_is_diff",
                        "value": lambda r, i, rows: rows[i - 1].is_diff and r.result.confidence < rows[i - 1].result.confidence,
                        "sort": {"value": "push_date", "sort": -1}
                    },
                ]
            })

            #PICK THE BEST SCORE FOR EACH is_diff==True REGION
            for g2, data in Q.groupby(stats, "is_diff", contiguous=True):
                if g2.is_diff:
                    best = Q.sort(data, ["result.confidence", "diff"]).last()
                    best["pass"] = True

            all_touched.update(Q.select(test_results, ["test_run_id", "Talos.Test"]))

            # TESTS THAT HAVE BEEN (RE)EVALUATED GIVEN THE NEW INFORMATION
            evaled_tests.update(Q.run({
                "from": test_results,
                "select": ["test_run_id", "Talos.Test"],
                "where": {"term": {"ignored": False}}
            }))

            File("test_values.txt").write(CNV.list2tab(Q.select(stats, [
                {"name": "push_date", "value": lambda x: CNV.datetime2string(CNV.milli2datetime(x.push_date), "%d-%b-%Y %H:%M:%S")},
                "value",
                {"name": "revision", "value": "Talos.Revision"},
                {"name": "confidence", "value": "result.confidence"},
                "pass"
            ])))

            #TESTS THAT HAVE SHOWN THEMSELVES TO BE EXCEPTIONAL
            new_exceptions = Q.filter(stats, {"term": {"pass": True}})
            for v in new_exceptions:
                if v.ignored:
                    continue
                alert = Struct(
                    status="new",
                    create_time=CNV.milli2datetime(v.push_date),
                    tdad_id={"test_run_id": v.test_run_id, "Talos": {"Test": v.Talos.Test}},
                    reason=REASON,
                    revision=v.Talos.Revision,
                    details=v,
                    severity=SEVERITY,
                    confidence=v.result.confidence
                )
                alerts.append(alert)

            if debug:
                Log.note("{{num}} new exceptions found", {"num": len(new_exceptions)})

        except Exception, e:
            Log.warning("Problem with alert identification, continue to log existing alerts and stop cleanly", e)
from dzAlerts.util.queries.query import Query
from dzAlerts.util.struct import nvl, StructList, literal_field, unwrap
from dzAlerts.util.sql.db import SQL
from dzAlerts.util.env.logs import Log
from dzAlerts.util.struct import Struct
from dzAlerts.util.queries import Q
from dzAlerts.util.sql.db import DB
from dzAlerts.util.times.timer import Timer


SEVERITY = 0.8              # THERE ARE MANY FALSE POSITIVES (0.99 == positive indicator, 0.5==not an indicator, 0.01 == negative indicator)
# MIN_CONFIDENCE = 0.9999
REASON = "talos_alert_sustained_median"     # name of the reason in alert_reason
NOW = datetime.utcnow()
MAX_AGE = timedelta(days=90)
OLDEST_TS = CNV.datetime2milli(NOW - MAX_AGE)

TEMPLATE = """<div><h3>{{score}} - {{reason}}</h3><br>
On page {{page_url}}<br>
<a href=\"https://tbpl.mozilla.org/?tree={{branch}}&rev={{revision}}\">TBPL</a><br>
<a href=\"https://hg.mozilla.org/rev/{{revision}}\">Mercurial</a><br>
<a href=\"https://bugzilla.mozilla.org/show_bug.cgi?id={{bug_id}}\">Bugzilla - {{bug_description}}</a><br>
<a href=\"https://datazilla.mozilla.org/?start={{push_date_min}}&stop={{push_date_max}}&product={{product}}&repository={{branch}}&os={{operating_system_name}}&os_version={{operating_system_version}}&test={{test_name}}&graph_search={{revision}}&error_bars=false&project=talos\">Datazilla</a><br>
<a href=\"http://people.mozilla.com/~klahnakoski/test/es/DZ-ShowPage.html#page={{page_url}}&sampleMax={{push_date}}000&sampleMin={{push_date_min}}000&branch={{branch}}\">Kyle's ES</a><br>
Raw data:  {{details}}
</div>"""

DEBUG = True


def alert_sustained_median(settings, qb, alerts_db):
def page_threshold_limit(db, debug):
    """
    simplest of rules to test the dataflow from test_run, to alert, to email
    may prove slightly useful also!
    #point out any pages that are breaking human-set threshold limits
    """
    db.debug = debug

    try:
        # CALCULATE HOW FAR BACK TO LOOK
        lasttime = db.query("SELECT last_run, description FROM reasons WHERE code={{type}}", {"type": REASON})[0]
        lasttime = nvl(lasttime.last_run, datetime.utcnow())
        min_date = lasttime + LOOK_BACK

        # FIND ALL PAGES THAT HAVE LIMITS TO TEST
        # BRING BACK ONES THAT BREAK LIMITS
        # BUT DO NOT ALREADY HAVE AN ALERTS EXISTING
        pages = db.query(
            """
            SELECT
                t.id tdad_id,
                t.n_replicates,
                t.mean,
                t.std,
                h.threshold,
                h.severity,
                h.reason,
                m.id alert_id
            FROM
                alert_page_thresholds h
            JOIN
                test_data_all_dimensions t ON t.page_id=h.page
            LEFT JOIN
                alerts m on m.tdad_id=t.test_run_id AND m.reason={{type}}
            WHERE
                h.threshold<t.mean AND
                t.push_date>{{min_date}} AND
                (m.id IS NULL OR m.status='obsol11ete')
        """,
            {"type": REASON, "min_date": min_date},
        )

        # FOR EACH PAGE THAT BREAKS LIMITS
        for page in pages:
            if page.alert_id != None:
                break

            alert = {
                "id": SQL("util.newid()"),
                "status": "new",
                "create_time": datetime.utcnow(),
                "last_updated": datetime.utcnow(),
                "tdad_id": page.tdad_id,
                "reason": REASON,
                "details": CNV.object2JSON(
                    {"expected": float(page.threshold), "actual": float(page.mean), "reason": page.reason}
                ),
                "severity": page.severity,
                "confidence": 1.0  # USING NORMAL DIST ASSUMPTION WE CAN ADJUST
                # CONFIDENCE EVEN BEFORE THRESHOLD IS HIT!
                # FOR NOW WE KEEP IT SIMPLE
            }

            db.insert("alerts", alert)

        for page in pages:
            if page.alert_id == None:
                break
            db.update("alerts", None)  # ERROR FOR NOW

        # OBSOLETE THE ALERTS THAT SHOULD NO LONGER GET SENT
        obsolete = db.query(
            """
            SELECT
                m.id,
                m.tdad_id
            FROM
                alerts m
            JOIN
                test_data_all_dimensions t ON m.tdad_id=t.id
            JOIN
                alert_page_thresholds h on t.page_id=h.page
            WHERE
                m.reason={{reason}} AND
                h.threshold>=t.mean AND
                t.push_date>{{time}}
        """,
            {"reason": REASON, "time": min_date},
        )

        if obsolete:
            db.execute(
                "UPDATE alerts SET status='obsolete' WHERE {{where}}",
                {"where": esfilter2sqlwhere(db, {"terms": {"id": Q.select(obsolete, "id")}})},
            )

        db.execute(
            "UPDATE reasons SET last_run={{now}} WHERE code={{reason}}", {"now": datetime.utcnow(), "reason": REASON}
        )

    except Exception, e:

        Log.error("Could not perform threshold comparisons", e)
def talos_alert_revision(settings):
    assert settings.alerts != None
    settings.db.debug = settings.param.debug
    with DB(settings.alerts) as db:
        with ESQuery(ElasticSearch(settings.query["from"])) as esq:

            dbq = DBQuery(db)
            esq.addDimension(CNV.JSON2object(File(settings.dimension.filename).read()))

            #TODO: REMOVE, LEAVE IN DB
            if db.debug:
                db.execute("update reasons set email_subject={{subject}}, email_template={{template}} where code={{reason}}", {
                    "template": CNV.object2JSON(TEMPLATE),
                    "subject": CNV.object2JSON(SUBJECT),
                    "reason": REASON
                })
                db.flush()

            #EXISTING SUSTAINED EXCEPTIONS
            existing_sustained_alerts = dbq.query({
                "from": "alerts",
                "select": "*",
                "where": {"and": [
                    {"term": {"reason": talos_sustained_median.REASON}},
                    {"not": {"term": {"status": "obsolete"}}},
                    {"range": {"create_time": {"gte": NOW - LOOK_BACK}}}
                ]}
            })

            tests = Q.index(existing_sustained_alerts, ["revision", "details.Talos.Test"])

            #EXISTING REVISION-LEVEL ALERTS
            old_alerts = dbq.query({
                "from": "alerts",
                "select": "*",
                "where": {"and": [
                    {"term": {"reason": REASON}},
                    {"or": [
                        {"terms": {"revision": set(existing_sustained_alerts.revision)}},

                        {"term": {"reason": talos_sustained_median.REASON}},
                        {"term": {"status": "obsolete"}},
                        {"range": {"create_time": {"gte": NOW - LOOK_BACK}}}
                    ]}
                ]}
            })
            old_alerts = Q.unique_index(old_alerts, "revision")

            #SUMMARIZE
            known_alerts = StructList()
            for revision in set(existing_sustained_alerts.revision):
            #FIND TOTAL TDAD FOR EACH INTERESTING REVISION
                total_tests = esq.query({
                    "from": "talos",
                    "select": {"name": "count", "aggregate": "count"},
                    "where": {"and":[
                        {"terms": {"Talos.Revision": revision}}
                    ]}
                })
                total_exceptions = tests[(revision, )]  # FILTER BY revision

                parts = StructList()
                for g, exceptions in Q.groupby(total_exceptions, ["details.Talos.Test"]):
                    worst_in_test = Q.sort(exceptions, ["confidence", "details.diff_percent"]).last()
                    example = worst_in_test.details
                    # ADD SOME DATAZILLA SPECIFIC URL PARAMETERS
                    branch = example.Talos.Branch.replace("-Non-PGO", "")
                    example.tbpl.url.branch = TBPL_PATH.get(branch, branch)
                    example.mercurial.url.branch = MECURIAL_PATH.get(branch, branch)
                    example.datazilla.url.branch = example.Talos.Branch #+ ("" if worst_in_test.Talos.Branch.pgo else "-Non-PGO")
                    example.datazilla.url.x86 = "true" if example.Talos.Platform == "x86" else "false"
                    example.datazilla.url.x86_64 = "true" if example.Talos.Platform == "x86_64" else "false"
                    example.datazilla.url.stop = nvl(example.push_date_max, (2*example.push_date) - example.push_date_min)

                    num_except = len(exceptions)
                    if num_except == 0:
                        continue

                    part = {
                        "test": g.details.Talos.Test,
                        "num_exceptions": num_except,
                        "num_tests": total_tests,
                        "confidence": worst_in_test.confidence,
                        "example": example
                    }
                    parts.append(part)

                parts = Q.sort(parts, [{"field": "confidence", "sort": -1}])
                worst_in_revision = parts[0].example

                known_alerts.append({
                    "status": "new",
                    "create_time": CNV.milli2datetime(worst_in_revision.push_date),
                    "reason": REASON,
                    "revision": revision,
                    "tdad_id": revision,
                    "details": {
                        "revision": revision,
                        "total_tests": total_tests,
                        "total_exceptions": len(total_exceptions),
                        "tests": parts,
                        "example": worst_in_revision
                    },
                    "severity": SEVERITY,
                    "confidence": worst_in_revision.result.confidence
                })

            known_alerts = Q.unique_index(known_alerts, "revision")

            #NEW ALERTS, JUST INSERT
            new_alerts = known_alerts - old_alerts
            if new_alerts:
                for revision in new_alerts:
                    revision.id = SQL("util.newid()")
                    revision.last_updated = NOW
                db.insert_list("alerts", new_alerts)

            #SHOW SUSTAINED ALERTS ARE COVERED
            db.execute("""
                INSERT INTO hierarchy (parent, child)
                SELECT
                    r.id parent,
                    p.id child
                FROM
                    alerts p
                LEFT JOIN
                    hierarchy h on h.child=p.id
                LEFT JOIN
                    alerts r on r.revision=p.revision AND r.reason={{parent_reason}}
                WHERE
                    {{where}}
            """, {
                "where": esfilter2sqlwhere(db, {"and": [
                    {"term": {"p.reason": talos_sustained_median.REASON}},
                    {"terms": {"p.revision": Q.select(existing_sustained_alerts, "revision")}},
                    {"missing": "h.parent"}
                ]}),
                "parent_reason": REASON
            })

            #CURRENT ALERTS, UPDATE IF DIFFERENT
            for known_alert in known_alerts & old_alerts:
                if len(nvl(known_alert.solution, "").strip()) != 0:
                    continue  # DO NOT TOUCH SOLVED ALERTS

                old_alert = old_alerts[known_alert]
                if old_alert.status == 'obsolete' or significant_difference(known_alert.severity, old_alert.severity) or significant_difference(known_alert.confidence, old_alert.confidence):
                    known_alert.last_updated = NOW
                    db.update("alerts", {"id": old_alert.id}, known_alert)

            #OLD ALERTS, OBSOLETE
            for old_alert in old_alerts - known_alerts:
                if old_alert.status == 'obsolete':
                    continue
                db.update("alerts", {"id": old_alert.id}, {"status": "obsolete", "last_updated": NOW, "details":None})
    def setup(self, to_list):
        self.uid = self.db.query("SELECT util.newid() uid FROM DUAL")[0].uid

        #CLEAR EMAILS
        self.db.execute("DELETE FROM mail.delivery")
        self.db.execute("DELETE FROM mail.attachment")
        self.db.execute("DELETE FROM mail.content")

        #TEST NUMBER OF LISTENERS IN listeners TABLE
        self.db.execute("DELETE FROM listeners")
        for l in to_list:
            self.db.insert("listeners", {"email": l})


        #MAKE A REASON FOR USE IN THIS TESTING
        self.db.execute("DELETE FROM alerts WHERE reason={{reason}}", {"reason": self.reason})
        self.db.execute("DELETE FROM reasons WHERE code={{reason}}", {"reason": self.reason})
        self.db.insert("reasons", {
            "code": self.reason,
            "description": ">>>>{{id}}<<<<", #SPECIAL PATTERN TO DISTINGUISH BETWEEN RESULTING MAILS
            "config": None,
            "last_run": self.now - timedelta(days=1)
        })


        #MAKE SOME TEST DATA (AND GET ID)
        all_dim = struct.wrap({
            "header":
                ("id", "test_run_id", "product_id", "operating_system_id", "test_id", "page_id", "date_received", "revision", "product", "branch", "branch_version", "operating_system_name",
                 "operating_system_version", "processor", "build_type", "machine_name", "pushlog_id", "push_date", "test_name", "page_url", "mean", "std", "h0_rejected", "p", "n_replicates", "fdr",
                 "trend_mean", "trend_std", "test_evaluation", "status"),
            "data": [
                (0, 117679, 65, 20, 64, 860, 1366261267, "d6b34be6fb4c", "Firefox", "Mozilla-Inbound", "23.0a1", "win", "6.2.9200", "x86_64", "opt", "t-w864-ix-022", "19801727", "1366245741", "tp5o",
                 "bbc.co.uk", 138.8, 40.5257120028, 0, 0.650194865224, 25, 0, 144.37333333365, 12.96130778322, 1, 1)
            ]})
        self.db.insert_list("test_data_all_dimensions", CNV.table2list(all_dim.header, all_dim.data))
        self.series = self.db.query("SELECT min(id) id FROM test_data_all_dimensions")[0].id


        # WE INJECT THE EXPECTED TEST RESULTS RIGHT INTO THE DETAILS, THAT WAY
        # WE CAN SEE THEM IN THE EMAIL DELIVERED
        test_data = struct.wrap({
            "header":
                ("id", "status", "create_time", "last_updated", "last_sent", "tdad_id", "reason", "details", "severity", "confidence", "solution"),
            "data": [
                #TEST last_sent IS NOT TOO YOUNG
                (self.uid + 0, "new", self.far_past, self.far_past, self.recent_past, self.series, self.reason, CNV.object2JSON({"id": 0, "expect": "fail"}), self.high_severity, self.high_confidence,
                 None),
                #TEST last_sent IS TOO OLD, SHOULD BE (RE)SENT
                (self.uid + 1, "new", self.far_past, self.now, None, self.series, self.reason, CNV.object2JSON({"id": 1, "expect": "pass"}), self.high_severity, self.high_confidence, None),
                (self.uid + 2, "new", self.far_past, self.now, self.far_past, self.series, self.reason, CNV.object2JSON({"id": 2, "expect": "pass"}), self.high_severity, self.high_confidence, None),
                (self.uid + 3, "new", self.now, self.now, self.recent_past, self.series, self.reason, CNV.object2JSON({"id": 3, "expect": "pass"}), self.high_severity, self.high_confidence, None),
                #TEST obsolete ARE NOT SENT
                (self.uid + 4, "obsolete", self.now, self.now, self.far_past, self.series, self.reason, CNV.object2JSON({"id": 4, "expect": "fail"}), self.high_severity, self.high_confidence, None),
                #TEST ONLY IMPORTANT ARE SENT
                (self.uid + 5, "new", self.now, self.now, None, self.series, self.reason, CNV.object2JSON({"id": 5, "expect": "pass"}), self.important, 0.5, None),
                (self.uid + 6, "new", self.now, self.now, None, self.series, self.reason, CNV.object2JSON({"id": 6, "expect": "fail"}), self.low_severity, self.high_confidence, None),
                (self.uid + 7, "new", self.now, self.now, None, self.series, self.reason, CNV.object2JSON({"id": 7, "expect": "fail"}), self.high_severity, self.low_confidence, None),
                #TEST ONES WITH SOLUTION ARE NOT SENT
                (self.uid + 8, "new", self.now, self.now, None, self.series, self.reason, CNV.object2JSON({"id": 8, "expect": "fail"}), self.high_severity, self.high_confidence, "a solution!")
            ]
        })

        self.test_data = CNV.table2list(test_data.header, test_data.data)
        self.db.insert_list("alerts", self.test_data)