Beispiel #1
0
    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        self.type = "range"
        self.NULL = Null

        if self.partitions:
            # IGNORE THE min, max, interval
            if not self.key:
                Log.error("Must have a key value")

            parts = listwrap(self.partitions)
            for i, p in enumerate(parts):
                self.min = Math.min(self.min, p.min)
                self.max = Math.max(self.max, p.max)
                if p.dataIndex != None and p.dataIndex != i:
                    Log.error("Expecting `dataIndex` to agree with the order of the parts")
                if p[self.key] == None:
                    Log.error("Expecting all parts to have {{key}} as a property", key=self.key)
                p.dataIndex = i

            # VERIFY PARTITIONS DO NOT OVERLAP, HOLES ARE FINE
            for p, q in itertools.product(parts, parts):
                if p.min <= q.min and q.min < p.max:
                    Log.error("partitions overlap!")

            self.partitions = parts
            return
        elif any([self.min == None, self.max == None, self.interval == None]):
            Log.error("Can not handle missing parameter")

        self.key = "min"
        self.partitions = wrap([{"min": v, "max": v + self.interval, "dataIndex": i} for i, v in enumerate(frange(self.min, self.max, self.interval))])
Beispiel #2
0
 def __init__(self, edge, query, limit):
     AggsDecoder.__init__(self, edge, query, limit)
     self.domain = edge.domain
     self.domain.limit =Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
     self.parts = list()
     self.key2index = {}
     self.computed_domain = False
Beispiel #3
0
 def __init__(self, edge, query, limit):
     AggsDecoder.__init__(self, edge, query, limit)
     self.domain = edge.domain
     self.domain.limit =Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
     self.parts = list()
     self.key2index = {}
     self.computed_domain = False
Beispiel #4
0
def accumulate_logs(source_key, file_name, lines, please_stop):
    accumulator = LogSummary()
    for line in lines:
        if please_stop:
            Log.error(
                "Shutdown detected.  Structured log iterator is stopped.")
        accumulator.stats.bytes += len(
            line
        ) + 1  # INCLUDE THE \n THAT WOULD HAVE BEEN AT END OF EACH LINE
        line = strings.strip(line)

        if line == "":
            continue
        try:
            accumulator.stats.lines += 1
            log = convert.json2value(line)
            log.time = log.time / 1000
            accumulator.stats.start_time = Math.min(
                accumulator.stats.start_time, log.time)
            accumulator.stats.end_time = Math.max(accumulator.stats.end_time,
                                                  log.time)

            # FIX log.test TO BE A STRING
            if isinstance(log.test, list):
                log.test = " ".join(log.test)

            accumulator.__getattribute__(log.action)(log)
            if log.subtest:
                accumulator.last_subtest = log.time
        except Exception, e:
            accumulator.stats.bad_lines += 1
Beispiel #5
0
 def __init__(self, edge, query):
     AggsDecoder.__init__(self, edge, query)
     self.fields = edge.domain.dimension.fields
     self.domain = self.edge.domain
     self.domain.limit = Math.min(
         coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
     self.parts = list()
Beispiel #6
0
    def wrap(query, schema=None):
        """
        NORMALIZE QUERY SO IT CAN STILL BE JSON
        """
        if isinstance(query, QueryOp) or query == None:
            return query

        query = wrap(query)

        output = QueryOp("from", None)
        output.format = query.format
        output.frum = wrap_from(query["from"], schema=schema)
        if not schema and isinstance(output.frum, Schema):
            schema = output.frum

        if query.select:
            output.select = _normalize_selects(query.select,
                                               query.frum,
                                               schema=schema)
        else:
            if query.edges or query.groupby:
                output.select = Dict(name="count",
                                     value=jx_expression("."),
                                     aggregate="count",
                                     default=0)
            else:
                output.select = _normalize_selects(".", query["from"])

        if query.groupby and query.edges:
            Log.error(
                "You can not use both the `groupby` and `edges` clauses in the same query!"
            )
        elif query.edges:
            output.edges = _normalize_edges(query.edges, schema=schema)
            output.groupby = Null
        elif query.groupby:
            output.edges = Null
            output.groupby = _normalize_groupby(query.groupby, schema=schema)
        else:
            output.edges = Null
            output.groupby = Null

        output.where = _normalize_where(query.where, schema=schema)
        output.window = [_normalize_window(w) for w in listwrap(query.window)]
        output.having = None
        output.sort = _normalize_sort(query.sort)
        output.limit = Math.min(MAX_LIMIT, coalesce(query.limit,
                                                    DEFAULT_LIMIT))
        if not Math.is_integer(output.limit) or output.limit < 0:
            Log.error("Expecting limit >= 0")

        output.isLean = query.isLean

        return output
Beispiel #7
0
def main():
    settings = startup.read_settings(defs={
       "name": ["--restart", "--reset", "--redo"],
       "help": "force a reprocessing of all data",
       "action": "store_true",
       "dest": "restart"
    })
    Log.start(settings.debug)

    try:
        with startup.SingleInstance(flavor_id=settings.args.filename):
            if settings.args.restart:
                reviews = Cluster(settings.destination).create_index(settings.destination)
            else:
                reviews = Cluster(settings.destination).get_proto(settings.destination)

            bugs = Cluster(settings.source).get_index(settings.source)

            with FromES(bugs) as esq:
                es_max_bug = esq.query({
                    "from": "private_bugs",
                    "select": {"name": "max_bug", "value": "bug_id", "aggregate": "maximum"}
                })

            #PROBE WHAT RANGE OF BUGS IS LEFT TO DO (IN EVENT OF FAILURE)
            with FromES(reviews) as esq:
                es_min_bug = esq.query({
                    "from": "reviews",
                    "select": {"name": "min_bug", "value": "bug_id", "aggregate": "minimum"}
                })

            batch_size = coalesce(bugs.settings.batch_size, settings.size, 1000)
            threads = coalesce(settings.threads, 4)
            Log.note(str(settings.min_bug))
            min_bug = int(coalesce(settings.min_bug, 0))
            max_bug = int(coalesce(settings.max_bug, Math.min(es_min_bug + batch_size * threads, es_max_bug)))

            with ThreadedQueue(reviews, batch_size=coalesce(reviews.settings.batch_size, 100)) as sink:
                func = functools.partial(full_etl, settings, sink)
                with Multithread(func, threads=threads) as m:
                    m.inbound.silent = True
                    Log.note("bugs from {{min}} to {{max}}, step {{step}}", {
                        "min": min_bug,
                        "max": max_bug,
                        "step": batch_size
                    })
                    m.execute(reversed([{"bugs": range(s, e)} for s, e in qb.intervals(min_bug, max_bug, size=1000)]))

            if settings.args.restart:
                reviews.add_alias()
                reviews.delete_all_but_self()
    finally:
        Log.stop()
Beispiel #8
0
 def __getslice__(self, i, j):
     j = Math.min(j, len(self))
     if j - 1 > 2**28:
         Log.error("Slice of {{num}} bytes is too big", num=j - i)
     try:
         self.file.seek(i)
         output = self.file.read(j - i).decode(self.encoding)
         return output
     except Exception, e:
         Log.error(
             "Can not read file slice at {{index}}, with encoding {{encoding}}",
             index=i,
             encoding=self.encoding,
             cause=e)
Beispiel #9
0
 def __getslice__(self, i, j):
     j = Math.min(j, len(self))
     if j - 1 > 2**28:
         Log.error("Slice of {{num}} bytes is too big", num=j - i)
     try:
         self.file.seek(i)
         output = self.file.read(j - i).decode(self.encoding)
         return output
     except Exception, e:
         Log.error(
             "Can not read file slice at {{index}}, with encoding {{encoding}}",
             index=i,
             encoding=self.encoding,
             cause=e)
Beispiel #10
0
    def wrap(query, schema=None):
        """
        NORMALIZE QUERY SO IT CAN STILL BE JSON
        """
        if isinstance(query, QueryOp) or query == None:
            return query

        query = wrap(query)

        output = QueryOp("from", None)
        output.format = query.format
        output.frum = wrap_from(query["from"], schema=schema)
        if not schema and isinstance(output.frum, Schema):
            schema = output.frum

        if query.select:
            output.select = _normalize_selects(query.select, query.frum, schema=schema)
        else:
            if query.edges or query.groupby:
                output.select = Dict(name="count", value=jx_expression("."), aggregate="count", default=0)
            else:
                output.select = _normalize_selects(".", query["from"])

        if query.groupby and query.edges:
            Log.error("You can not use both the `groupby` and `edges` clauses in the same query!")
        elif query.edges:
            output.edges = _normalize_edges(query.edges, schema=schema)
            output.groupby = Null
        elif query.groupby:
            output.edges = Null
            output.groupby = _normalize_groupby(query.groupby, schema=schema)
        else:
            output.edges = Null
            output.groupby = Null

        output.where = _normalize_where(query.where, schema=schema)
        output.window = [_normalize_window(w) for w in listwrap(query.window)]
        output.having = None
        output.sort = _normalize_sort(query.sort)
        output.limit = Math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT))
        if not Math.is_integer(output.limit) or output.limit < 0:
            Log.error("Expecting limit >= 0")

        output.isLean = query.isLean

        return output
Beispiel #11
0
    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        self.type = "range"
        self.NULL = Null

        if self.partitions:
            # IGNORE THE min, max, interval
            if not self.key:
                Log.error("Must have a key value")

            parts = listwrap(self.partitions)
            for i, p in enumerate(parts):
                self.min = Math.min(self.min, p.min)
                self.max = Math.max(self.max, p.max)
                if p.dataIndex != None and p.dataIndex != i:
                    Log.error(
                        "Expecting `dataIndex` to agree with the order of the parts"
                    )
                if p[self.key] == None:
                    Log.error(
                        "Expecting all parts to have {{key}} as a property",
                        key=self.key)
                p.dataIndex = i

            # VERIFY PARTITIONS DO NOT OVERLAP, HOLES ARE FINE
            for p, q in itertools.product(parts, parts):
                if p.min <= q.min and q.min < p.max:
                    Log.error("partitions overlap!")

            self.partitions = parts
            return
        elif any([self.min == None, self.max == None, self.interval == None]):
            Log.error("Can not handle missing parameter")

        self.key = "min"
        self.partitions = wrap([{
            "min": v,
            "max": v + self.interval,
            "dataIndex": i
        } for i, v in enumerate(frange(self.min, self.max, self.interval))])
Beispiel #12
0
 def __init__(self, edge, query, limit):
     AggsDecoder.__init__(self, edge, query, limit)
     self.fields = edge.domain.dimension.fields
     self.domain = self.edge.domain
     self.domain.limit =Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
     self.parts = list()
Beispiel #13
0
    def __init__(self, query, schema=None):
        """
        NORMALIZE QUERY SO IT CAN STILL BE JSON
        """
        if isinstance(query, Query) or query == None:
            return

        object.__init__(self)
        query = wrap(query)

        self.format = query.format
        self.frum = wrap_from(query["from"], schema=schema)

        select = query.select
        if isinstance(select, list):
            names = set()
            new_select = []
            for s in select:
                ns = _normalize_select(s, schema=schema)
                if ns.name in names:
                    Log.error("two select have the same name")
                names.add(ns.name)
                new_select.append(unwrap(ns))
            self.select = wrap(new_select)
        elif select:
            self.select = _normalize_select(select, schema=schema)
        else:
            if query.edges or query.groupby:
                self.select = Dict(name="count", value=".", aggregate="count")
            else:
                self.select = Dict(name=".", value=".", aggregate="none")

        if query.groupby and query.edges:
            Log.error("You can not use both the `groupby` and `edges` clauses in the same query!")
        elif query.edges:
            self.edges = _normalize_edges(query.edges, schema=schema)
            self.groupby = None
        elif query.groupby:
            self.edges = None
            self.groupby = _normalize_groupby(query.groupby, schema=schema)
        else:
            self.edges = []
            self.groupby = None

        self.where = _normalize_where(query.where, schema=schema)
        self.window = [_normalize_window(w) for w in listwrap(query.window)]
        self.having = None
        self.sort = _normalize_sort(query.sort)
        self.limit = Math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT))
        if not Math.is_integer(self.limit) or self.limit < 0:
            Log.error("Expecting limit >= 0")

        self.isLean = query.isLean


        # DEPTH ANALYSIS - LOOK FOR COLUMN REFERENCES THAT MAY BE DEEPER THAN
        # THE from SOURCE IS.
        # TODO: IGNORE REACHING INTO THE NON-NESTED TYPES
        if isinstance(self.frum, list):
            if not qb:
                _late_import()
            columns = qb.get_columns(self.frum)
        elif isinstance(self.frum, Container):
            columns = self.frum.get_columns(table=self.frum.name)
        else:
            columns = []

        query_path = coalesce(self.frum.query_path, ".")
        vars = query_get_all_vars(self, exclude_where=True)  # WE WILL EXCLUDE where VARIABLES
        for c in columns:
            if c.name in vars and not query_path.startswith(coalesce(listwrap(c.nested_path)[0], "")):
                Log.error("This query, with variable {{var_name}} is too deep", var_name=c.name)
Beispiel #14
0
def es_aggsop(es, frum, query):
    select = wrap([s.copy() for s in listwrap(query.select)])
    es_column_map = {c.name: unwraplist(c.es_column) for c in frum.schema.all_columns}

    es_query = Dict()
    new_select = Dict()  #MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING
    formula = []
    for s in select:
        if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".":
            s.pull = "doc_count"
        elif isinstance(s.value, Variable):
            if s.value.var == ".":
                if frum.typed:
                    # STATISITCAL AGGS IMPLY $value, WHILE OTHERS CAN BE ANYTHING
                    if s.aggregate in NON_STATISTICAL_AGGS:
                        #TODO: HANDLE BOTH $value AND $objects TO COUNT
                        Log.error("do not know how to handle")
                    else:
                        s.value.var = "$value"
                        new_select["$value"] += [s]
                else:
                    if s.aggregate in NON_STATISTICAL_AGGS:
                        #TODO:  WE SHOULD BE ABLE TO COUNT, BUT WE MUST *OR* ALL LEAF VALUES TO DO IT
                        Log.error("do not know how to handle")
                    else:
                        Log.error('Not expecting ES to have a value at "." which {{agg}} can be applied', agg=s.aggregate)
            elif s.aggregate == "count":
                s.value = s.value.map(es_column_map)
                new_select["count_"+literal_field(s.value.var)] += [s]
            else:
                s.value = s.value.map(es_column_map)
                new_select[literal_field(s.value.var)] += [s]
        else:
            formula.append(s)

    for canonical_name, many in new_select.items():
        representative = many[0]
        if representative.value.var == ".":
            Log.error("do not know how to handle")
        else:
            field_name = representative.value.var

        # canonical_name=literal_field(many[0].name)
        for s in many:
            if s.aggregate == "count":
                es_query.aggs[literal_field(canonical_name)].value_count.field = field_name
                s.pull = literal_field(canonical_name) + ".value"
            elif s.aggregate == "median":
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = literal_field(canonical_name + " percentile")

                es_query.aggs[key].percentiles.field = field_name
                es_query.aggs[key].percentiles.percents += [50]
                s.pull = key + ".values.50\.0"
            elif s.aggregate == "percentile":
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = literal_field(canonical_name + " percentile")
                if isinstance(s.percentile, basestring) or s.percetile < 0 or 1 < s.percentile:
                    Log.error("Expecting percentile to be a float from 0.0 to 1.0")
                percent = Math.round(s.percentile * 100, decimal=6)

                es_query.aggs[key].percentiles.field = field_name
                es_query.aggs[key].percentiles.percents += [percent]
                s.pull = key + ".values." + literal_field(unicode(percent))
            elif s.aggregate == "cardinality":
                # ES USES DIFFERENT METHOD FOR CARDINALITY
                key = literal_field(canonical_name + " cardinality")

                es_query.aggs[key].cardinality.field = field_name
                s.pull = key + ".value"
            elif s.aggregate == "stats":
                # REGULAR STATS
                stats_name = literal_field(canonical_name)
                es_query.aggs[stats_name].extended_stats.field = field_name

                # GET MEDIAN TOO!
                median_name = literal_field(canonical_name + " percentile")
                es_query.aggs[median_name].percentiles.field = field_name
                es_query.aggs[median_name].percentiles.percents += [50]

                s.pull = {
                    "count": stats_name + ".count",
                    "sum": stats_name + ".sum",
                    "min": stats_name + ".min",
                    "max": stats_name + ".max",
                    "avg": stats_name + ".avg",
                    "sos": stats_name + ".sum_of_squares",
                    "std": stats_name + ".std_deviation",
                    "var": stats_name + ".variance",
                    "median": median_name + ".values.50\.0"
                }
            elif s.aggregate == "union":
                # USE TERMS AGGREGATE TO SIMULATE union
                stats_name = literal_field(canonical_name)
                es_query.aggs[stats_name].terms.field = field_name
                es_query.aggs[stats_name].terms.size = Math.min(s.limit, MAX_LIMIT)
                s.pull = stats_name + ".buckets.key"
            else:
                # PULL VALUE OUT OF THE stats AGGREGATE
                es_query.aggs[literal_field(canonical_name)].extended_stats.field = field_name
                s.pull = literal_field(canonical_name) + "." + aggregates1_4[s.aggregate]

    for i, s in enumerate(formula):
        canonical_name = literal_field(s.name)
        abs_value = s.value.map(es_column_map)

        if s.aggregate == "count":
            es_query.aggs[literal_field(canonical_name)].value_count.script = abs_value.to_ruby()
            s.pull = literal_field(canonical_name) + ".value"
        elif s.aggregate == "median":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")

            es_query.aggs[key].percentiles.script = abs_value.to_ruby()
            es_query.aggs[key].percentiles.percents += [50]
            s.pull = key + ".values.50\.0"
        elif s.aggregate == "percentile":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")
            percent = Math.round(s.percentile * 100, decimal=6)

            es_query.aggs[key].percentiles.script = abs_value.to_ruby()
            es_query.aggs[key].percentiles.percents += [percent]
            s.pull = key + ".values." + literal_field(unicode(percent))
        elif s.aggregate == "cardinality":
            # ES USES DIFFERENT METHOD FOR CARDINALITY
            key = canonical_name + " cardinality"

            es_query.aggs[key].cardinality.script = abs_value.to_ruby()
            s.pull = key + ".value"
        elif s.aggregate == "stats":
            # REGULAR STATS
            stats_name = literal_field(canonical_name)
            es_query.aggs[stats_name].extended_stats.script = abs_value.to_ruby()

            # GET MEDIAN TOO!
            median_name = literal_field(canonical_name + " percentile")
            es_query.aggs[median_name].percentiles.script = abs_value.to_ruby()
            es_query.aggs[median_name].percentiles.percents += [50]

            s.pull = {
                "count": stats_name + ".count",
                "sum": stats_name + ".sum",
                "min": stats_name + ".min",
                "max": stats_name + ".max",
                "avg": stats_name + ".avg",
                "sos": stats_name + ".sum_of_squares",
                "std": stats_name + ".std_deviation",
                "var": stats_name + ".variance",
                "median": median_name + ".values.50\.0"
            }
        elif s.aggregate=="union":
            # USE TERMS AGGREGATE TO SIMULATE union
            stats_name = literal_field(canonical_name)
            es_query.aggs[stats_name].terms.script_field = abs_value.to_ruby()
            s.pull = stats_name + ".buckets.key"
        else:
            # PULL VALUE OUT OF THE stats AGGREGATE
            s.pull = canonical_name + "." + aggregates1_4[s.aggregate]
            es_query.aggs[canonical_name].extended_stats.script = abs_value.to_ruby()

    decoders = get_decoders_by_depth(query)
    start = 0

    vars_ = query.where.vars()

    #<TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested
    split_where = split_expression_by_depth(query.where, schema=frum, map_=es_column_map)

    if len(split_field(frum.name)) > 1:
        if any(split_where[2::]):
            Log.error("Where clause is too deep")

        for d in decoders[1]:
            es_query = d.append_query(es_query, start)
            start += d.num_columns

        if split_where[1]:
            #TODO: INCLUDE FILTERS ON EDGES
            filter_ = simplify_esfilter(AndOp("and", split_where[1]).to_esfilter())
            es_query = Dict(
                aggs={"_filter": set_default({"filter": filter_}, es_query)}
            )

        es_query = wrap({
            "aggs": {"_nested": set_default(
                {
                    "nested": {
                        "path": frum.query_path
                    }
                },
                es_query
            )}
        })
    else:
        if any(split_where[1::]):
            Log.error("Where clause is too deep")

    for d in decoders[0]:
        es_query = d.append_query(es_query, start)
        start += d.num_columns

    if split_where[0]:
        #TODO: INCLUDE FILTERS ON EDGES
        filter = simplify_esfilter(AndOp("and", split_where[0]).to_esfilter())
        es_query = Dict(
            aggs={"_filter": set_default({"filter": filter}, es_query)}
        )
    # </TERRIBLE SECTION>

    if not es_query:
        es_query = wrap({"query": {"match_all": {}}})

    es_query.size = 0

    with Timer("ES query time") as es_duration:
        result = es09.util.post(es, es_query, query.limit)

    try:
        format_time = Timer("formatting")
        with format_time:
            decoders = [d for ds in decoders for d in ds]
            result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total)  # IT APPEARS THE OLD doc_count IS GONE

            formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format]
            if query.edges:
                output = formatter(decoders, result.aggregations, start, query, select)
            elif query.groupby:
                output = groupby_formatter(decoders, result.aggregations, start, query, select)
            else:
                output = aggop_formatter(decoders, result.aggregations, start, query, select)

        output.meta.timing.formatting = format_time.duration
        output.meta.timing.es_search = es_duration.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        if query.format not in format_dispatch:
            Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e)
        Log.error("Some problem", e)
    def transform(self, id, datazilla):
        try:
            r = datazilla.json_blob

            #ADD DATAZILLA MARKUP
            r.datazilla = {
                "id": id,
                "date_loaded": datazilla.date_loaded * 1000,
                "error_flag": datazilla.error_flag,
                "test_run_id": datazilla.test_run_id,
                "processed_flag": datazilla.processed_flag,
                "error_msg": datazilla.error_msg
            }

            #CONVERT UNIX TIMESTAMP TO MILLISECOND TIMESTAMP
            r.testrun.date *= 1000

            def mainthread_transform(r):
                if r == None:
                    return None

                output = Struct()

                for i in r.mainthread_readbytes:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].readbytes = i[0]
                r.mainthread_readbytes = None

                for i in r.mainthread_writebytes:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].writebytes = i[0]
                r.mainthread_writebytes = None

                for i in r.mainthread_readcount:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].readcount = i[0]
                r.mainthread_readcount = None

                for i in r.mainthread_writecount:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].writecount = i[0]
                r.mainthread_writecount = None

                r.mainthread = output.values()

            mainthread_transform(r.results_aux)
            mainthread_transform(r.results_xperf)

            #ADD PUSH LOG INFO
            try:
                branch = r.test_build.branch
                if branch.endswith("-Non-PGO"):
                    r.test_build.branch = branch
                    r.test_build.pgo = False
                    branch = branch[0:-8]
                else:
                    r.test_build.pgo = True

                with Profiler("get from pushlog"):
                    if not self.pushlog:
                        #NO PUSHLOG MEANS WE DO NOTHING TO MARKUP TEST RESULTS
                        pass
                    elif self.pushlog[branch]:
                        possible_dates = self.pushlog[branch][r.test_build.revision]
                        if possible_dates:
                            r.test_build.push_date = int(Math.round(possible_dates[0].date * 1000))
                        else:
                            if r.test_build.revision == 'NULL':
                                r.test_build.no_pushlog = True  # OOPS! SOMETHING BROKE
                            elif CNV.milli2datetime(Math.min(r.testrun.date, r.datazilla.date_loaded)) < PUSHLOG_TOO_OLD:
                                Log.note("{{branch}} @ {{revision}} has no pushlog, transforming anyway", r.test_build)
                                r.test_build.no_pushlog = True
                            else:
                                Log.note("{{branch}} @ {{revision}} has no pushlog, try again later", r.test_build)
                                return []  # TRY AGAIN LATER
                    else:
                        with self.locker:
                            if branch not in self.unknown_branches:
                                Log.note("Whole branch {{branch}} has no pushlog", {"branch":branch})
                                self.unknown_branches.add(branch)
                            if CNV.milli2datetime(Math.min(r.testrun.date, r.datazilla.date_loaded)) < PUSHLOG_TOO_OLD:
                                r.test_build.no_pushlog = True
                            else:
                                r.test_build.no_pushlog = True
                                #return [r]  #TODO: DO THIS IF WE FIGURE OUT HOW TO HANDLE THE VERY LARGE NUMBER OF RESULTS WITH NO PUSHLOG

            except Exception, e:
                Log.warning("{{branch}} @ {{revision}} has no pushlog", r.test_build, e)

            new_records = []

            # RECORD THE UNKNOWN PART OF THE TEST RESULTS
            remainder = r.copy()
            remainder.results = None
            if len(remainder.keys()) > 4:
                new_records.append(remainder)

            #RECORD TEST RESULTS
            total = StructList()
            if r.testrun.suite in ["dromaeo_css", "dromaeo_dom"]:
                #dromaeo IS SPECIAL, REPLICATES ARE IN SETS OF FIVE
                #RECORD ALL RESULTS
                for i, (test_name, replicates) in enumerate(r.results.items()):
                    for g, sub_results in Q.groupby(replicates, size=5):
                        new_record = Struct(
                            test_machine=r.test_machine,
                            datazilla=r.datazilla,
                            testrun=r.testrun,
                            test_build=r.test_build,
                            result={
                                "test_name": unicode(test_name) + "." + unicode(g),
                                "ordering": i,
                                "samples": sub_results
                            }
                        )
                        try:
                            s = stats(sub_results)
                            new_record.result.stats = s
                            total.append(s)
                        except Exception, e:
                            Log.warning("can not reduce series to moments", e)
                        new_records.append(new_record)
 def __init__(self, edge, query):
     AggsDecoder.__init__(self, edge, query)
     self.domain = edge.domain
     self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
     self.parts = list()