def _es_terms2(es, mvel, query):
    """
    WE ASSUME THERE ARE JUST TWO EDGES, AND EACH HAS A SIMPLE value
    """

    # REQUEST VALUES IN FIRST DIMENSION
    q1 = query.copy()
    q1.edges = query.edges[0:1:]
    values1 = es_terms(es, mvel, q1).edges[0].domain.partitions.value

    select = struct.listwrap(query.select)
    esQuery = buildESQuery(query)
    for s in select:
        for i, v in enumerate(values1):
            esQuery.facets[s.name + "," + str(i)] = {
                "terms": {
                    "field": query.edges[1].value,
                    "size": nvl(query.limit, 200000)
                },
                "facet_filter": simplify({"and": [
                    query.where,
                    {"term": {query.edges[0].value: v}}
                ]})
            }

    data = es_query_util.post(es, esQuery, query.limit)

    # UNION ALL TERMS FROM SECOND DIMENSION
    values2 = set()
    for k, f in data.facets.items():
        values2.update(f.terms.term)
    values2 = Q.sort(values2)
    term2index = {v: i for i, v in enumerate(values2)}
    query.edges[1].domain.partitions = StructList([{"name": v, "value": v} for v in values2])

    # MAKE CUBE
    output = {}
    dims = [len(values1), len(values2)]
    for s in select:
        output[s.name] = Matrix(*dims)

    # FILL CUBE
    # EXPECTING ONLY SELECT CLAUSE FACETS
    for facetName, facet in data.facets.items():
        coord = facetName.split(",")
        s = [s for s in select if s.name == coord[0]][0]
        i1 = int(coord[1])
        for term in facet.terms:
            i2 = term2index[term.term]
            output[s.name][(i1, i2)] = term[aggregates[s.aggregate]]

    cube = Cube(query.select, query.edges, output)
    cube.query = query
    return cube
def alert_sustained_median(settings, qb, alerts_db):
    """
    find single points that deviate from the trend
    """
    # OBJECTSTORE = settings.objectstore.schema + ".objectstore"
    # TDAD = settings.perftest.schema + ".test_data_all_dimensions"
    TDAD = settings.query["from"]
    PUSH_DATE = "datazilla.date_loaded"

    debug = nvl(settings.param.debug, DEBUG)
    query = settings.query

    def is_bad(r):
        if settings.param.sustained_median.trigger < r.result.confidence:
            test_param = nvl(settings.param.test[literal_field(r.Talos.Test.name)], settings.param.suite[literal_field(r.Talos.Test.suite)])

            if test_param == None:
                return True

            if test_param.disable:
                return False

            if test_param.better == "higher":
                diff = -r.diff
            elif test_param.better == "lower":
                diff = r.diff
            else:
                diff = abs(r.diff)  # DEFAULT = ANY DIRECTION IS BAD

            if test_param.min_regression:
                if unicode(test_param.min_regression.strip()[-1]) == "%":
                    min_diff = Math.abs(r.past_stats.mean * float(test_param.min_regression.strip()[:-1]) / 100.0)
                else:
                    min_diff = Math.abs(float(test_param.min_regression))
            else:
                min_diff = Math.abs(r.past_stats.mean * 0.01)

            if diff > min_diff:
                return True

        return False

    with Timer("pull combinations"):
        disabled_suites = [s for s, p in settings.param.suite.items() if p.disable]
        disabled_tests = [t for t, p in settings.param.test.items() if p.disable]

        temp = Query({
            "from": TDAD,
            "select": {"name": "min_push_date", "value": PUSH_DATE, "aggregate": "min"},
            "edges": query.edges,
            "where": {"and": [
                True if settings.args.restart else {"missing": {"field": settings.param.mark_complete}},
                {"exists": {"field": "result.test_name"}},
                {"range": {PUSH_DATE: {"gte": OLDEST_TS}}},
                {"not": {"terms": {"Talos.Test.suite": disabled_suites}}},
                {"not": {"terms": {"Talos.Test.name": disabled_tests}}}
                # {"term": {"testrun.suite": "cart"}},
                # {"term": {"result.test_name": "1-customize-enter.error.TART"}},
                # {"term": {"test_machine.osversion": "OS X 10.8"}}
                #FOR DEBUGGING SPECIFIC SERIES
                # {"term": {"test_machine.type": "hamachi"}},
                # {"term": {"test_machine.platform": "Gonk"}},
                # {"term": {"test_machine.os": "Firefox OS"}},
                # {"term": {"test_build.branch": "master"}},
                # {"term": {"testrun.suite": "communications/ftu"}},
                # {"term": {"result.test_name": "startup_time"}}
            ]},
            "limit": nvl(settings.param.combo_limit, 1000)
        }, qb)

        new_test_points = qb.query(temp)

    #BRING IN ALL NEEDED DATA
    if debug:
        Log.note("Pull all data for {{num}} groups:\n{{groups.name}}", {
            "num": len(new_test_points),
            "groups": query.edges
        })

    # all_min_date = Null
    all_touched = set()
    evaled_tests = set()
    alerts = []   # PUT ALL THE EXCEPTION ITEMS HERE
    for g, test_points in Q.groupby(new_test_points, query.edges):
        if not test_points.min_push_date:
            continue
        try:
            if settings.args.restart:
                first_sample = OLDEST_TS
            else:
                first_sample = MAX(MIN(test_points.min_push_date), OLDEST_TS)
            # FOR THIS g, HOW FAR BACK IN TIME MUST WE GO TO COVER OUR WINDOW_SIZE?
            first_in_window = qb.query({
                "select": {"name": "min_date", "value": "push_date", "aggregate": "min"},
                "from": {
                    "from": TDAD,
                    "select": {"name": "push_date", "value": PUSH_DATE},
                    "where": {"and": [
                        {"term": g},
                        {"range": {PUSH_DATE: {"lt": first_sample}}}
                    ]},
                    "sort": {"field": PUSH_DATE, "sort": -1},
                    "limit": settings.param.sustained_median.window_size * 2
                }
            })
            if len(first_in_window) > settings.param.sustained_median.window_size * 2:
                do_all = False
            else:
                do_all = True

            min_date = MIN(first_sample, first_in_window.min_date)

            #LOAD TEST RESULTS FROM DATABASE
            test_results = qb.query({
                "from": {
                    "from": "talos",
                    "select": [{"name": "push_date", "value": PUSH_DATE}] +
                              query.select +
                              query.edges,
                    "where": {"and": [
                        {"term": g},
                        {"range": {PUSH_DATE: {"gte": min_date}}}
                    ]},
                },
                "sort": "push_date"
            })

            Log.note("{{num}} test results found for {{group}} dating back no further than {{start_date}}", {
                "num": len(test_results),
                "group": g,
                "start_date": CNV.milli2datetime(min_date)
            })

            if debug:
                Log.note("Find sustained_median exceptions")

            #APPLY WINDOW FUNCTIONS
            stats = Q.run({
                "from": {
                    "from": test_results,
                    "where": {"exists": {"field": "value"}}
                },
                "window": [
                    {
                        # WE DO NOT WANT TO CONSIDER THE POINTS BEFORE FULL WINDOW SIZE
                        "name": "ignored",
                        "value": lambda r, i: False if do_all or i > settings.param.sustained_median.window_size else True
                    }, {
                        # SO WE CAN SHOW A DATAZILLA WINDOW
                        "name": "push_date_min",
                        "value": lambda r: r.push_date,
                        "sort": "push_date",
                        "aggregate": windows.Min,
                        "range": {"min": -settings.param.sustained_median.window_size, "max": 0}
                    }, {
                        # SO WE CAN SHOW A DATAZILLA WINDOW
                        "name": "push_date_max",
                        "value": lambda r: r.push_date,
                        "sort": "push_date",
                        "aggregate": windows.Max,
                        "range": {"min": 0, "max": settings.param.sustained_median.window_size}
                    }, {
                        "name": "past_revision",
                        "value": lambda r, i, rows: rows[i - 1].Talos.Revision,
                        "sort": "push_date"
                    }, {
                        "name": "past_stats",
                        "value": lambda r: r.value,
                        "sort": "push_date",
                        "aggregate": windows.Stats(middle=0.60),
                        "range": {"min": -settings.param.sustained_median.window_size, "max": 0}
                    }, {
                        "name": "future_stats",
                        "value": lambda r: r.value,
                        "sort": "push_date",
                        "aggregate": windows.Stats(middle=0.60),
                        "range": {"min": 0, "max": settings.param.sustained_median.window_size}
                    }, {
                        "name": "result",
                        "value": lambda r, i, rows: median_test(
                            rows[-settings.param.sustained_median.window_size + i:i:].value,
                            rows[i:settings.param.sustained_median.window_size + i:].value,
                            interpolate=False
                        ),
                        "sort": "push_date"
                    }, {
                        "name": "diff",
                        "value": lambda r: r.future_stats.mean - r.past_stats.mean
                    }, {
                        "name": "diff_percent",
                        "value": lambda r: (r.future_stats.mean - r.past_stats.mean) / r.past_stats.mean
                    }, {
                        "name": "is_diff",
                        "value": is_bad
                    }, {
                        #USE THIS TO FILL CONFIDENCE HOLES
                        #WE CAN MARK IT is_diff KNOWING THERE IS A HIGHER CONFIDENCE
                        "name": "future_is_diff",
                        "value": lambda r, i, rows: rows[i - 1].is_diff and r.result.confidence < rows[i - 1].result.confidence,
                        "sort": "push_date"
                    }, {
                        #WE CAN MARK IT is_diff KNOWING THERE IS A HIGHER CONFIDENCE
                        "name": "past_is_diff",
                        "value": lambda r, i, rows: rows[i - 1].is_diff and r.result.confidence < rows[i - 1].result.confidence,
                        "sort": {"value": "push_date", "sort": -1}
                    },
                ]
            })

            #PICK THE BEST SCORE FOR EACH is_diff==True REGION
            for g2, data in Q.groupby(stats, "is_diff", contiguous=True):
                if g2.is_diff:
                    best = Q.sort(data, ["result.confidence", "diff"]).last()
                    best["pass"] = True

            all_touched.update(Q.select(test_results, ["test_run_id", "Talos.Test"]))

            # TESTS THAT HAVE BEEN (RE)EVALUATED GIVEN THE NEW INFORMATION
            evaled_tests.update(Q.run({
                "from": test_results,
                "select": ["test_run_id", "Talos.Test"],
                "where": {"term": {"ignored": False}}
            }))

            File("test_values.txt").write(CNV.list2tab(Q.select(stats, [
                {"name": "push_date", "value": lambda x: CNV.datetime2string(CNV.milli2datetime(x.push_date), "%d-%b-%Y %H:%M:%S")},
                "value",
                {"name": "revision", "value": "Talos.Revision"},
                {"name": "confidence", "value": "result.confidence"},
                "pass"
            ])))

            #TESTS THAT HAVE SHOWN THEMSELVES TO BE EXCEPTIONAL
            new_exceptions = Q.filter(stats, {"term": {"pass": True}})
            for v in new_exceptions:
                if v.ignored:
                    continue
                alert = Struct(
                    status="new",
                    create_time=CNV.milli2datetime(v.push_date),
                    tdad_id={"test_run_id": v.test_run_id, "Talos": {"Test": v.Talos.Test}},
                    reason=REASON,
                    revision=v.Talos.Revision,
                    details=v,
                    severity=SEVERITY,
                    confidence=v.result.confidence
                )
                alerts.append(alert)

            if debug:
                Log.note("{{num}} new exceptions found", {"num": len(new_exceptions)})

        except Exception, e:
            Log.warning("Problem with alert identification, continue to log existing alerts and stop cleanly", e)
def talos_alert_revision(settings):
    assert settings.alerts != None
    settings.db.debug = settings.param.debug
    with DB(settings.alerts) as db:
        with ESQuery(ElasticSearch(settings.query["from"])) as esq:

            dbq = DBQuery(db)
            esq.addDimension(CNV.JSON2object(File(settings.dimension.filename).read()))

            #TODO: REMOVE, LEAVE IN DB
            if db.debug:
                db.execute("update reasons set email_subject={{subject}}, email_template={{template}} where code={{reason}}", {
                    "template": CNV.object2JSON(TEMPLATE),
                    "subject": CNV.object2JSON(SUBJECT),
                    "reason": REASON
                })
                db.flush()

            #EXISTING SUSTAINED EXCEPTIONS
            existing_sustained_alerts = dbq.query({
                "from": "alerts",
                "select": "*",
                "where": {"and": [
                    {"term": {"reason": talos_sustained_median.REASON}},
                    {"not": {"term": {"status": "obsolete"}}},
                    {"range": {"create_time": {"gte": NOW - LOOK_BACK}}}
                ]}
            })

            tests = Q.index(existing_sustained_alerts, ["revision", "details.Talos.Test"])

            #EXISTING REVISION-LEVEL ALERTS
            old_alerts = dbq.query({
                "from": "alerts",
                "select": "*",
                "where": {"and": [
                    {"term": {"reason": REASON}},
                    {"or": [
                        {"terms": {"revision": set(existing_sustained_alerts.revision)}},

                        {"term": {"reason": talos_sustained_median.REASON}},
                        {"term": {"status": "obsolete"}},
                        {"range": {"create_time": {"gte": NOW - LOOK_BACK}}}
                    ]}
                ]}
            })
            old_alerts = Q.unique_index(old_alerts, "revision")

            #SUMMARIZE
            known_alerts = StructList()
            for revision in set(existing_sustained_alerts.revision):
            #FIND TOTAL TDAD FOR EACH INTERESTING REVISION
                total_tests = esq.query({
                    "from": "talos",
                    "select": {"name": "count", "aggregate": "count"},
                    "where": {"and":[
                        {"terms": {"Talos.Revision": revision}}
                    ]}
                })
                total_exceptions = tests[(revision, )]  # FILTER BY revision

                parts = StructList()
                for g, exceptions in Q.groupby(total_exceptions, ["details.Talos.Test"]):
                    worst_in_test = Q.sort(exceptions, ["confidence", "details.diff_percent"]).last()
                    example = worst_in_test.details
                    # ADD SOME DATAZILLA SPECIFIC URL PARAMETERS
                    branch = example.Talos.Branch.replace("-Non-PGO", "")
                    example.tbpl.url.branch = TBPL_PATH.get(branch, branch)
                    example.mercurial.url.branch = MECURIAL_PATH.get(branch, branch)
                    example.datazilla.url.branch = example.Talos.Branch #+ ("" if worst_in_test.Talos.Branch.pgo else "-Non-PGO")
                    example.datazilla.url.x86 = "true" if example.Talos.Platform == "x86" else "false"
                    example.datazilla.url.x86_64 = "true" if example.Talos.Platform == "x86_64" else "false"
                    example.datazilla.url.stop = nvl(example.push_date_max, (2*example.push_date) - example.push_date_min)

                    num_except = len(exceptions)
                    if num_except == 0:
                        continue

                    part = {
                        "test": g.details.Talos.Test,
                        "num_exceptions": num_except,
                        "num_tests": total_tests,
                        "confidence": worst_in_test.confidence,
                        "example": example
                    }
                    parts.append(part)

                parts = Q.sort(parts, [{"field": "confidence", "sort": -1}])
                worst_in_revision = parts[0].example

                known_alerts.append({
                    "status": "new",
                    "create_time": CNV.milli2datetime(worst_in_revision.push_date),
                    "reason": REASON,
                    "revision": revision,
                    "tdad_id": revision,
                    "details": {
                        "revision": revision,
                        "total_tests": total_tests,
                        "total_exceptions": len(total_exceptions),
                        "tests": parts,
                        "example": worst_in_revision
                    },
                    "severity": SEVERITY,
                    "confidence": worst_in_revision.result.confidence
                })

            known_alerts = Q.unique_index(known_alerts, "revision")

            #NEW ALERTS, JUST INSERT
            new_alerts = known_alerts - old_alerts
            if new_alerts:
                for revision in new_alerts:
                    revision.id = SQL("util.newid()")
                    revision.last_updated = NOW
                db.insert_list("alerts", new_alerts)

            #SHOW SUSTAINED ALERTS ARE COVERED
            db.execute("""
                INSERT INTO hierarchy (parent, child)
                SELECT
                    r.id parent,
                    p.id child
                FROM
                    alerts p
                LEFT JOIN
                    hierarchy h on h.child=p.id
                LEFT JOIN
                    alerts r on r.revision=p.revision AND r.reason={{parent_reason}}
                WHERE
                    {{where}}
            """, {
                "where": esfilter2sqlwhere(db, {"and": [
                    {"term": {"p.reason": talos_sustained_median.REASON}},
                    {"terms": {"p.revision": Q.select(existing_sustained_alerts, "revision")}},
                    {"missing": "h.parent"}
                ]}),
                "parent_reason": REASON
            })

            #CURRENT ALERTS, UPDATE IF DIFFERENT
            for known_alert in known_alerts & old_alerts:
                if len(nvl(known_alert.solution, "").strip()) != 0:
                    continue  # DO NOT TOUCH SOLVED ALERTS

                old_alert = old_alerts[known_alert]
                if old_alert.status == 'obsolete' or significant_difference(known_alert.severity, old_alert.severity) or significant_difference(known_alert.confidence, old_alert.confidence):
                    known_alert.last_updated = NOW
                    db.update("alerts", {"id": old_alert.id}, known_alert)

            #OLD ALERTS, OBSOLETE
            for old_alert in old_alerts - known_alerts:
                if old_alert.status == 'obsolete':
                    continue
                db.update("alerts", {"id": old_alert.id}, {"status": "obsolete", "last_updated": NOW, "details":None})