Exemple #1
0
    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        self.type = "range"
        self.NULL = Null

        if self.partitions:
            # IGNORE THE min, max, interval
            if not self.key:
                Log.error("Must have a key value")

            parts = listwrap(self.partitions)
            for i, p in enumerate(parts):
                self.min = Math.min(self.min, p.min)
                self.max = Math.max(self.max, p.max)
                if p.dataIndex != None and p.dataIndex != i:
                    Log.error("Expecting `dataIndex` to agree with the order of the parts")
                if p[self.key] == None:
                    Log.error("Expecting all parts to have {{key}} as a property", key=self.key)
                p.dataIndex = i

            # VERIFY PARTITIONS DO NOT OVERLAP, HOLES ARE FINE
            for p, q in itertools.product(parts, parts):
                if p.min <= q.min and q.min < p.max:
                    Log.error("partitions overlap!")

            self.partitions = parts
            return
        elif any([self.min == None, self.max == None, self.interval == None]):
            Log.error("Can not handle missing parameter")

        self.key = "min"
        self.partitions = wrap([{"min": v, "max": v + self.interval, "dataIndex": i} for i, v in enumerate(frange(self.min, self.max, self.interval))])
Exemple #2
0
def accumulate_logs(source_key, file_name, lines, please_stop):
    accumulator = LogSummary()
    for line in lines:
        if please_stop:
            Log.error(
                "Shutdown detected.  Structured log iterator is stopped.")
        accumulator.stats.bytes += len(
            line
        ) + 1  # INCLUDE THE \n THAT WOULD HAVE BEEN AT END OF EACH LINE
        line = strings.strip(line)

        if line == "":
            continue
        try:
            accumulator.stats.lines += 1
            log = convert.json2value(line)
            log.time = log.time / 1000
            accumulator.stats.start_time = Math.min(
                accumulator.stats.start_time, log.time)
            accumulator.stats.end_time = Math.max(accumulator.stats.end_time,
                                                  log.time)

            # FIX log.test TO BE A STRING
            if isinstance(log.test, list):
                log.test = " ".join(log.test)

            accumulator.__getattribute__(log.action)(log)
            if log.subtest:
                accumulator.last_subtest = log.time
        except Exception, e:
            accumulator.stats.bad_lines += 1
Exemple #3
0
def copy2es(es, settings, work_queue, please_stop=None):
    # EVERYTHING FROM ELASTICSEARCH
    bucket = s3.Bucket(settings.source)

    for key in iter(work_queue.pop, ""):
        if please_stop:
            return
        if key == None:
            continue

        key = unicode(key)
        extend_time = Timer("insert", silent=True)
        Log.note("Indexing {{key}}", key=key)
        with extend_time:
            if settings.sample_only:
                sample_filter = {
                    "terms": {
                        "build.branch": settings.sample_only
                    }
                }
            elif settings.sample_size:
                sample_filter = True
            else:
                sample_filter = None

            if key.find(":") >= 0:
                more_keys = bucket.keys(prefix=key)
            else:
                more_keys = bucket.keys(prefix=key + ":")
            num_keys = es.copy(more_keys, bucket, sample_filter,
                               settings.sample_size)

        if num_keys > 1:
            Log.note(
                "Added {{num}} keys from {{key}} block in {{duration}} ({{rate|round(places=3)}} keys/second)",
                num=num_keys,
                key=key,
                duration=extend_time.duration,
                rate=num_keys / Math.max(extend_time.duration.seconds, 0.01))

        work_queue.commit()
Exemple #4
0
    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        self.type = "range"
        self.NULL = Null

        if self.partitions:
            # IGNORE THE min, max, interval
            if not self.key:
                Log.error("Must have a key value")

            parts = listwrap(self.partitions)
            for i, p in enumerate(parts):
                self.min = Math.min(self.min, p.min)
                self.max = Math.max(self.max, p.max)
                if p.dataIndex != None and p.dataIndex != i:
                    Log.error(
                        "Expecting `dataIndex` to agree with the order of the parts"
                    )
                if p[self.key] == None:
                    Log.error(
                        "Expecting all parts to have {{key}} as a property",
                        key=self.key)
                p.dataIndex = i

            # VERIFY PARTITIONS DO NOT OVERLAP, HOLES ARE FINE
            for p, q in itertools.product(parts, parts):
                if p.min <= q.min and q.min < p.max:
                    Log.error("partitions overlap!")

            self.partitions = parts
            return
        elif any([self.min == None, self.max == None, self.interval == None]):
            Log.error("Can not handle missing parameter")

        self.key = "min"
        self.partitions = wrap([{
            "min": v,
            "max": v + self.interval,
            "dataIndex": i
        } for i, v in enumerate(frange(self.min, self.max, self.interval))])
Exemple #5
0
 def add(self, value):
     self.max = Math.max(self.max, value)
Exemple #6
0
def full_etl(settings, sink, bugs):
    with Timer("process block {{start}}", {"start": min(bugs)}):
        es = elasticsearch.Index(settings.source)
        with FromES(es) as esq:
            versions = esq.query({
                "from": "bugs",
                "select": "*",
                "where": {"terms": {"bug_id": bugs}}
            })

        starts = qb.run({
            "select": [
                "bug_id",
                "bug_status",
                {"name": "attach_id", "value": "attachments.attach_id"},
                {"name": "request_time", "value": "modified_ts"},
                {"name": "request_type", "value": "attachments.flags.request_type"},
                {"name": "reviewer", "value": "attachments.flags.requestee"},
                {"name": "created_by", "value": "attachments.created_by"},
                "product",
                "component"
            ],
            "from":
                versions,
            "where":
                {"and": [
                    {"terms": {"attachments.flags.request_status": ["?"]}},
                    {"terms": {"attachments.flags.request_type": TYPES}},
                    {"equal": ["attachments.flags.modified_ts", "modified_ts"]},
                    {"term": {"attachments.isobsolete": 0}}
                ]},
            "sort": ["bug_id", "attach_id", "created_by"]
        })

        ends = qb.run({
            "select": [
                {"name": "bug_id", "value": "bug_id"},
                "bug_status",
                {"name": "attach_id", "value": "attachments.attach_id"},
                {"name": "modified_ts", "value": lambda r: Math.max(r.modified_ts, r.attachments.modified_ts, r.attachments.flags.modified_ts)},
                {"name": "reviewer", "value": "attachments.flags.requestee"},
                {"name": "request_type", "value": "attachments.flags.request_type"},
                {"name": "modified_by", "value": "attachments.flags.modified_by"},
                {"name": "product", "value": "product"},
                {"name": "component", "value": "component"},
                {"name": "review_end_reason", "value": lambda r: 'done' if r.attachments.flags.request_status != '?' else ('obsolete' if r.attachments.isobsolete == 1 else 'closed')},
                {"name": "review_result", "value": lambda r: '+' if r.attachments.flags.request_status == '+' else ('-' if r.attachments.flags.request_status == '-' else '?')}
            ],
            "from":
                versions,
            "where":
                {"and": [
                    {"terms": {"attachments.flags.request_type": TYPES}},
                    {"or": [
                        {"and": [# IF THE REQUESTEE SWITCHED THE ? FLAG, THEN IT IS DONE
                            {"term": {"attachments.flags.previous_status": "?"}},
                            {"not": {"term": {"attachments.flags.request_status": "?"}}},
                            {"equal": ["attachments.flags.modified_ts", "modified_ts"]}
                        ]},
                        {"and": [# IF OBSOLETED THE ATTACHMENT, IT IS DONE
                            {"term": {"attachments.isobsolete": 1}},
                            {"term": {"previous_values.isobsolete_value": 0}}
                        ]},
                        {"and": [# SOME BUGS ARE CLOSED WITHOUT REMOVING REVIEW
                            {"terms": {"bug_status": ["resolved", "verified", "closed"]}},
                            {"not": {"terms": {"previous_values.bug_status_value": ["resolved", "verified", "closed"]}}}
                        ]}
                    ]}
                ]}
        })

        # SOME ATTACHMENTS GO MISSING, CLOSE THEM TOO
        closed_bugs = {b.bug_id: b for b in qb.filter(versions, {"and": [# SOME BUGS ARE CLOSED WITHOUT REMOVING REVIEW
            {"terms": {"bug_status": ["resolved", "verified", "closed"]}},
            {"range": {"expires_on": {"gte": Date.now().milli}}}
        ]})}

        for s in starts:
            if s.bug_id in closed_bugs:
                e = closed_bugs[s.bug_id]
                ends.append({
                    "bug_id": e.bug_id,
                    "bug_status": e.bug_status,
                    "attach_id": s.attach_id,
                    "modified_ts": e.modified_ts,
                    "reviewer": s.reviewer,
                    "request_type": s.request_type,
                    "modified_by": e.modified_by,
                    "product": e.product,
                    "component": e.component,
                    "review_end_reason": 'closed',
                    "review_result": '?'
                })

        # REVIEWS END WHEN REASSIGNED TO SOMEONE ELSE
        changes = qb.run({
            "select": [
                "bug_id",
                {"name": "attach_id", "value": "changes.attach_id"},
                "modified_ts",
                {"name": "reviewer", "value": lambda r: r.changes.old_value.split("?")[1]},
                {"name": "request_type", "value": lambda r: r.changes.old_value.split("?")[0]},
                {"name": "modified_by", "value": "null"},
                "product",
                "component",
                {"name": "review_end_reason", "value": "'reassigned'"}
            ],
            "from":
                versions,
            "where":
                {"and": [# ONLY LOOK FOR NAME CHANGES IN THE "review?" FIELD
                    {"term": {"changes.field_name": "flags"}},
                    {"or": [{"prefix": {"changes.old_value": t + "?"}} for t in TYPES]}
                ]}
        })

        ends.extend(changes)

    # PYTHON VERSION NOT CAPABLE OF THIS JOIN, YET
    # reviews = qb.run({
    #     "from":
    #         starts,
    #     "select": [
    #         {"name": "bug_status", "value": "bug_status", "aggregate": "one"},
    #         {"name": "review_time", "value": "doneReview.modified_ts", "aggregate": "minimum"},
    #         {"name": "review_result", "value": "doneReview.review_result", "aggregate": "minimum"},
    #         {"name": "product", "value": "coalesce(doneReview.product, product)", "aggregate": "minimum"},
    #         {"name": "component", "value": "coalesce(doneReview.component, component)", "aggregate": "minimum"},
    #         # {"name": "keywords", "value": "(coalesce(keywords, '')+' '+ETL.parseWhiteBoard(whiteboard)).trim()+' '+flags", "aggregate": "one"},
    #         {"name": "requester_review_num", "value": "-1", "aggregate": "one"}
    #     ],
    #     "analytic": [
    #         {"name": "is_first", "value": "rownum==0 ? 1 : 0", "sort": "request_time", "edges": ["bug_id"]}
    #     ],
    #     "edges": [
    #         "bug_id",
    #         "attach_id",
    #         {"name": "reviewer", "value": "requestee"},
    #         {"name": "requester", "value": "created_by"},
    #         {"name": "request_time", "value": "modified_ts"},
    #         {
    #             "name": "doneReview",
    #             "test":
    #                 "bug_id==doneReview.bug_id && " +
    #                 "attach_id==doneReview.attach_id && " +
    #                 "requestee==doneReview.requestee && " +
    #                 "!(bug_status=='closed' && doneReview.review_end_reason=='closed') && " +
    #                 "modified_ts<=doneReview.modified_ts",
    #             "allowNulls": True,
    #             "domain": {"type": "set", "key":["bug_id", "attach_id", "requestee", "modified_ts"], "partitions": ends}
    #         }
    #     ]
    # })

    with Timer("match starts and ends for block {{start}}", {"start":min(*bugs)}):
        reviews = []
        ends = Index(data=ends, keys=["bug_id", "attach_id", "request_type", "reviewer"])

        for g, s in qb.groupby(starts, ["bug_id", "attach_id", "request_type", "reviewer"]):
            start_candidates = qb.sort(s, {"value": "request_time", "sort": 1})
            end_candidates = qb.sort(ends[g], {"value": "modified_ts", "sort": 1})

            #ZIP, BUT WITH ADDED CONSTRAINT s.modified_ts<=e.modified_ts
            if len(start_candidates) > 1:
                Log.note("many reviews on one attachment")
            ei = 0
            for i, s in enumerate(start_candidates):
                while ei < len(end_candidates) and end_candidates[ei].modified_ts < coalesce(s.request_time, convert.datetime2milli(Date.MAX)):
                    ei += 1
                e = end_candidates[ei]

                s.review_time = e.modified_ts
                s.review_duration = e.modified_ts - s.request_time
                s.review_result = e.review_result
                s.review_end_reason = e.review_end_reason
                s.product = coalesce(e.product, s.product)
                s.component = coalesce(e.component, s.component)
                s.requester_review_num = -1
                ei += 1

                if s.bug_status == 'closed' and e.review_end_reason == 'closed':
                    #reviews on closed bugs are ignored
                    continue
                reviews.append(s)

        qb.run({
            "from": reviews,
            "window": [{
                "name": "is_first",
                "value": "rownum == 0",
                "edges": ["bug_id"],
                "sort": ["request_time"],
                "aggregate": "none"
            }]
        })

    with Timer("add {{num}} reviews to ES for block {{start}}", {"start": min(*bugs), "num": len(reviews)}):
        sink.extend({"json": convert.value2json(r)} for r in reviews)