Ejemplo n.º 1
0
    def test_incremental_etl_catches_tracking_flags(self):
        database.make_test_instance(self.settings.bugzilla)

        with DB(self.settings.bugzilla) as db:
            es = elasticsearch.make_test_instance("candidate", self.settings.candidate)

            #SETUP RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(get_current_time(db))
            # FLAGS ADDED TO BUG 813650 ON 18/12/2012 2:38:08 AM (PDT), SO START AT SOME LATER TIME
            param.start_time = CNV.datetime2milli(CNV.string2datetime("02/01/2013 10:09:15", "%d/%m/%Y %H:%M:%S"))
            param.start_time_str = extract_bugzilla.milli2string(db, param.start_time)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = struct.wrap([813650])
            param.allow_private_bugs = self.settings.param.allow_private_bugs

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

            Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
            versions = get_all_bug_versions(es, 813650)

            flags = ["cf_status_firefox18", "cf_status_firefox19", "cf_status_firefox_esr17", "cf_status_b2g18"]
            for v in versions:
                if v.modified_ts>param.start_time:
                    for f in flags:
                        if v[f] != "fixed":
                            Log.error("813650 should have {{flag}}=='fixed'", {"flag": f})
Ejemplo n.º 2
0
def main(settings, es=None, es_comments=None):
    if not settings.param.allow_private_bugs and es and not es_comments:
        Log.error("Must have ES for comments")

    resume_from_last_run = File(
        settings.param.first_run_time).exists and not File(
            settings.param.last_run_time).exists

    #MAKE HANDLES TO CONTAINERS
    try:
        with DB(settings.bugzilla, readonly=True) as db:
            current_run_time, es, es_comments, last_run_time = setup_es(
                settings, db, es, es_comments)

            with ThreadedQueue(es, size=500, silent=True) as output_queue:
                #SETUP RUN PARAMETERS
                param = Struct()
                param.end_time = CNV.datetime2milli(get_current_time(db))
                # DB WRITES ARE DELAYED, RESULTING IN UNORDERED bug_when IN bugs_activity (AS IS ASSUMED FOR bugs(delats_ts))
                # THIS JITTER IS USUALLY NO MORE THAN ONE SECOND, BUT WE WILL GO BACK 60sec, JUST IN CASE.
                # THERE ARE OCCASIONAL WRITES THAT ARE IN GMT, BUT SINCE THEY LOOK LIKE THE FUTURE, WE CAPTURE THEM
                param.start_time = last_run_time - nvl(
                    settings.param.look_back,
                    5 * 60 * 1000)  # 5 MINUTE LOOK_BACK
                param.start_time_str = extract_bugzilla.milli2string(
                    db, param.start_time)
                param.alias_file = settings.param.alias_file
                param.allow_private_bugs = settings.param.allow_private_bugs

                if last_run_time > 0:
                    with Timer("run incremental etl"):
                        incremental_etl(settings, param, db, es, es_comments,
                                        output_queue)
                else:
                    with Timer("run full etl"):
                        full_etl(resume_from_last_run, settings, param, db, es,
                                 es_comments, output_queue)

                output_queue.add(Thread.STOP)

        if settings.es.alias:
            es.delete_all_but(settings.es.alias, settings.es.index)
            es.add_alias(settings.es.alias)

        if settings.es_comments.alias:
            es.delete_all_but(settings.es_comments.alias,
                              settings.es_comments.index)
            es_comments.add_alias(settings.es_comments.alias)

        File(settings.param.last_run_time).write(
            unicode(CNV.datetime2milli(current_run_time)))
    except Exception, e:
        Log.error("Problem with main ETL loop", e)
Ejemplo n.º 3
0
    def test_incremental_has_correct_expires_on(self):
        # 813650, 726635 BOTH HAVE CHANGES IN 2013
        bugs = struct.wrap([813650, 726635])
        start_incremental=CNV.datetime2milli(CNV.string2datetime("2013-01-01", "%Y-%m-%d"))

        es = elasticsearch.make_test_instance("candidate", self.settings.candidate)
        with DB(self.settings.bugzilla) as db:
            #SETUP FIRST RUN PARAMETERS
            param = Struct()
            param.end_time = start_incremental
            param.start_time = 0
            param.start_time_str = extract_bugzilla.milli2string(db, param.start_time)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = bugs
            param.allow_private_bugs = False

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

            #SETUP INCREMENTAL RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(datetime.utcnow())
            param.start_time = start_incremental
            param.start_time_str = extract_bugzilla.milli2string(db, param.start_time)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = bugs
            param.allow_private_bugs = False

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

        for b in bugs:
            results = es.search({
                "query": {"filtered": {
                    "query": {"match_all": {}},
                    "filter": {"and":[
                        {"term":{"bug_id":b}},
                        {"range":{"expires_on":{"gte":CNV.datetime2milli(datetime.utcnow())}}}
                    ]}
                }},
                "from": 0,
                "size": 200000,
                "sort": [],
                "fields": ["bug_id"]
            })

            if results.hits.total>1:
                Log.error("Expecting only one active bug_version record")
def replicate(source, destination, pending, last_updated):
    """
    COPY source RECORDS TO destination
    """
    for g, bugs in Q.groupby(pending, max_size=BATCH_SIZE):
        with Timer("Replicate {{num_bugs}} bug versions", {"num_bugs": len(bugs)}):
            data = source.search({
                "query": {"filtered": {
                    "query": {"match_all": {}},
                    "filter": {"and": [
                        {"terms": {"bug_id": set(bugs)}},
                        {"range": {"modified_ts":
                            {"gte": CNV.datetime2milli(last_updated)}
                        }}
                    ]}
                }},
                "from": 0,
                "size": 200000,
                "sort": []
            })

            d2 = map(
                lambda(x): {"id": x.id, "value": x},
                map(
                    lambda(x): transform_bugzilla.normalize(transform_bugzilla.rename_attachments(x._source), old_school=True),
                    data.hits.hits
                )
            )
            destination.extend(d2)
def get_pending(es, since):
    result = es.search({
        "query": {"filtered": {
            "query": {"match_all": {}},
            "filter": {
            "range": {"modified_ts": {"gte": CNV.datetime2milli(since)}}}
        }},
        "from": 0,
        "size": 0,
        "sort": [],
        "facets": {"default": {"terms": {"field": "bug_id", "size": 200000}}}
    })

    if len(result.facets.default.terms) >= 200000:
        Log.error("Can not handle more than 200K bugs changed")

    pending_bugs = Multiset(
        result.facets.default.terms,
        key_field="term",
        count_field="count"
    )
    Log.note("Source has {{num}} bug versions for updating", {
        "num": len(pending_bugs)
    })
    return pending_bugs
Ejemplo n.º 6
0
def main(settings):

    #MAKE HANDLES TO CONTAINERS
    with DB(settings.bugzilla) as db:
        #REAL ES
        #        if settings.candidate.alias is None:
        #            settings.candidate.alias=settings.candidate.index
        #            settings.candidate.index=settings.candidate.alias+CNV.datetime2string(datetime.utcnow(), "%Y%m%d_%H%M%S")
        #        candidate=ElasticSearch.create_index(settings.candidate, File(settings.candidate.schema_file).read())
        candidate = Fake_ES(settings.fake_es)

        reference = ElasticSearch(settings.reference)

        #SETUP RUN PARAMETERS
        param = Struct()
        param.BUGS_TABLE_COLUMNS = get_bugs_table_columns(
            db, settings.bugzilla.schema)
        param.BUGS_TABLE_COLUMNS_SQL = SQL(",\n".join(
            ["`" + c.column_name + "`" for c in param.BUGS_TABLE_COLUMNS]))
        param.BUGS_TABLE_COLUMNS = Q.select(param.BUGS_TABLE_COLUMNS,
                                            "column_name")
        param.END_TIME = CNV.datetime2milli(datetime.utcnow())
        param.START_TIME = 0
        param.alias_file = settings.param.alias_file
        param.BUG_IDS_PARTITION = SQL("bug_id in {{bugs}}",
                                      {"bugs": db.quote(settings.param.bugs)})

        etl(db, candidate, param)

        #COMPARE ALL BUGS
        compare_both(candidate, reference, settings, settings.param.bugs)
Ejemplo n.º 7
0
    def test_ambiguous_whiteboard_screened(self):
        GOOD_BUG_TO_TEST=1046

        database.make_test_instance(self.settings.bugzilla)

        with DB(self.settings.bugzilla) as db:
            es = elasticsearch.make_test_instance("candidate", self.settings.candidate)

            #MARK BUG AS ONE OF THE SCREENED GROUPS
            database.add_bug_group(db, GOOD_BUG_TO_TEST, SCREENED_WHITEBOARD_BUG_GROUPS[0])
            #MARK BUG AS ONE OF THE *NOT* SCREENED GROUPS
            database.add_bug_group(db, GOOD_BUG_TO_TEST, "not screened")
            db.flush()

            #SETUP RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(get_current_time(db))
            param.start_time = 0
            param.start_time_str = extract_bugzilla.milli2string(db, 0)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = struct.wrap([GOOD_BUG_TO_TEST]) # bug 1046 sees lots of whiteboard, and other field, changes
            param.allow_private_bugs = True

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

            Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
            versions = get_all_bug_versions(es, GOOD_BUG_TO_TEST)

            for v in versions:
                if v.status_whiteboard not in (None, "", "[screened]"):
                    Log.error("Expecting whiteboard to be screened")
Ejemplo n.º 8
0
def get_all_bug_versions(es, bug_id, max_time):

    data = es.search({
        "query": {
            "filtered": {
                "query": {
                    "match_all": {}
                },
                "filter": {
                    "and": [{
                        "term": {
                            "bug_id": bug_id
                        }
                    }, {
                        "range": {
                            "modified_ts": {
                                "lte": CNV.datetime2milli(max_time)
                            }
                        }
                    }]
                }
            }
        },
        "from": 0,
        "size": 200000,
        "sort": []
    })

    return Q.select(data.hits.hits, "_source")
Ejemplo n.º 9
0
def diff(db, table, old_record, new_record):
    """
    UPDATE bugs_activity WITH THE CHANGES IN RECORDS
    """
    now = milli2string(db, CNV.datetime2milli(get_current_time(db)))
    changed = set(old_record.keys()) ^ set(new_record.keys())
    changed |= set([k for k, v in old_record.items() if v != new_record[k]])

    if table != u"bugs":
        prefix = table + u"."
    else:
        prefix = u""

    for c in changed:
        fieldid=db.query("SELECT id FROM fielddefs WHERE name={{field_name}}", {"field_name": prefix + c})[0].id

        if fieldid == None:
            Log.error("Expecting a valid field name")

        activity = Struct(
            bug_id=old_record.bug_id,
            who=1,
            bug_when=now,
            fieldid=fieldid,
            removed=old_record[c],
            added=new_record[c],
            attach_id=old_record.attach_id,
            comment_id=old_record.comment_id
        )
        db.insert("bugs_activity", activity)

    db.execute("UPDATE bugs SET delta_ts={{now}} WHERE {{where}}", {
        "now":now,
        "where":esfilter2sqlwhere(db, {"term":{"bug_id":old_record.bug_id}})
    })
Ejemplo n.º 10
0
def main(settings):

    #MAKE HANDLES TO CONTAINERS
    with DB(settings.bugzilla) as db:
        #REAL ES
#        if settings.candidate.alias is None:
#            settings.candidate.alias=settings.candidate.index
#            settings.candidate.index=settings.candidate.alias+CNV.datetime2string(datetime.utcnow(), "%Y%m%d_%H%M%S")
#        candidate=ElasticSearch.create_index(settings.candidate, File(settings.candidate.schema_file).read())
        candidate=Fake_ES(settings.fake_es)

        reference=ElasticSearch(settings.reference)

        #SETUP RUN PARAMETERS
        param=Struct()
        param.BUGS_TABLE_COLUMNS=get_bugs_table_columns(db, settings.bugzilla.schema)
        param.BUGS_TABLE_COLUMNS_SQL=SQL(",\n".join(["`"+c.column_name+"`" for c in param.BUGS_TABLE_COLUMNS]))
        param.BUGS_TABLE_COLUMNS=Q.select(param.BUGS_TABLE_COLUMNS, "column_name")
        param.END_TIME=CNV.datetime2milli(datetime.utcnow())
        param.START_TIME=0
        param.alias_file=settings.param.alias_file
        param.BUG_IDS_PARTITION=SQL("bug_id in {{bugs}}", {"bugs":db.quote(settings.param.bugs)})

        etl(db, candidate, param)

        #COMPARE ALL BUGS
        compare_both(candidate, reference, settings, settings.param.bugs)
Ejemplo n.º 11
0
    def test_specific_bugs(self):
        """
        USE A MYSQL DATABASE TO FILL AN ES INSTANCE (USE Fake_ES() INSTANCES TO KEEP
        THIS TEST LOCAL) WITH VERSIONS OF BUGS FROM settings.param.bugs.  COMPARE
        THOSE VERSIONS TO A REFERENCE ES (ALSO CHECKED INTO REPOSITORY)
        """
        # settings.param.allow_private_bugs = True
        database.make_test_instance(self.settings.bugzilla)

        with DB(self.settings.bugzilla) as db:
            candidate = elasticsearch.make_test_instance("candidate", self.settings.candidate)
            reference = elasticsearch.open_test_instance("reference", self.settings.private_bugs_reference)

            #SETUP RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(get_current_time(db))
            param.start_time = 0
            param.start_time_str = extract_bugzilla.milli2string(db, 0)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = self.settings.param.bugs
            param.allow_private_bugs = self.settings.param.allow_private_bugs

            with ThreadedQueue(candidate, size=1000) as output:
                etl(db, output, param, please_stop=None)

            #COMPARE ALL BUGS
            Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
            compare_both(candidate, reference, self.settings, self.settings.param.bugs)
    def test_specific_bugs(self):
        """
        USE A MYSQL DATABASE TO FILL AN ES INSTANCE (USE Fake_ES() INSTANCES TO KEEP
        THIS TEST LOCAL) WITH VERSIONS OF BUGS FROM settings.param.bugs.  COMPARE
        THOSE VERSIONS TO A REFERENCE ES (ALSO CHECKED INTO REPOSITORY)
        """
        # settings.param.allow_private_bugs = True
        database.make_test_instance(self.settings.bugzilla)

        with DB(self.settings.bugzilla) as db:
            candidate = elasticsearch.make_test_instance(
                "candidate", self.settings.candidate)
            reference = elasticsearch.open_test_instance(
                "reference", self.settings.private_bugs_reference)

            #SETUP RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(get_current_time(db))
            param.start_time = 0
            param.start_time_str = extract_bugzilla.milli2string(db, 0)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = self.settings.param.bugs
            param.allow_private_bugs = self.settings.param.allow_private_bugs

            with ThreadedQueue(candidate, size=1000) as output:
                etl(db, output, param, please_stop=None)

            #COMPARE ALL BUGS
            Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
            compare_both(candidate, reference, self.settings,
                         self.settings.param.bugs)
    def test_whiteboard_screened(self):
        GOOD_BUG_TO_TEST = 1046

        database.make_test_instance(self.settings.bugzilla)

        with DB(self.settings.bugzilla) as db:
            es = elasticsearch.make_test_instance("candidate",
                                                  self.settings.candidate)

            #MARK BUG AS ONE OF THE SCREENED GROUPS
            database.add_bug_group(db, GOOD_BUG_TO_TEST,
                                   SCREENED_WHITEBOARD_BUG_GROUPS[0])
            db.flush()

            #SETUP RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(get_current_time(db))
            param.start_time = 0
            param.start_time_str = extract_bugzilla.milli2string(db, 0)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = struct.wrap([
                GOOD_BUG_TO_TEST
            ])  # bug 1046 sees lots of whiteboard, and other field, changes
            param.allow_private_bugs = True

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

            Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
            versions = get_all_bug_versions(es, GOOD_BUG_TO_TEST)

            for v in versions:
                if v.status_whiteboard not in (None, "", "[screened]"):
                    Log.error("Expecting whiteboard to be screened")
Ejemplo n.º 14
0
def old2new(bug):
    #THESE ARE KNOWN CHANGES THAT SHOULD BE MADE TO THE PRODUCTION VERSION
    bug.id=bug._id.replace(".", "_")[:-3]
    bug._id=None

    if bug.everconfirmed is not None: bug.everconfirmed=int(bug.everconfirmed)
    if bug.votes is not None: bug.votes=int(bug.votes)
    bug.dupe_by=CNV.value2intlist(bug.dupe_by)
    if bug.votes==0: del bug["votes"]
    if Math.is_integer(bug.remaining_time) and int(bug.remaining_time)==0: del bug["remaining_time"]
    if bug.cf_due_date is not None: bug.cf_due_date=CNV.datetime2milli(CNV.string2datetime(bug.cf_due_date, "%Y-%m-%d"))
    if bug.everconfirmed==0: del bug["everconfirmed"]


    try:
        bug.cf_last_resolved=CNV.datetime2milli(CNV.string2datetime(bug.cf_last_resolved, "%Y-%m-%d %H:%M:%S"))
    except Exception, e:
        pass
    def random_sample_of_bugs(self):
        """
        I USE THIS TO FIND BUGS THAT CAUSE MY CODE PROBLEMS.  OF COURSE, IT ONLY WORKS
        WHEN I HAVE A REFERENCE TO COMPARE TO
        """
        NUM_TO_TEST = 100
        MAX_BUG_ID = 900000

        with DB(self.settings.bugzilla) as db:
            candidate = elasticsearch.make_test_instance(
                "candidate", self.settings.candidate)
            reference = ElasticSearch(self.settings.private_bugs_reference)

            #GO FASTER BY STORING LOCAL FILE
            local_cache = File(self.settings.param.temp_dir +
                               "/private_bugs.json")
            if local_cache.exists:
                private_bugs = set(CNV.JSON2object(local_cache.read()))
            else:
                with Timer("get private bugs"):
                    private_bugs = compare_es.get_private_bugs(reference)
                    local_cache.write(CNV.object2JSON(private_bugs))

            while True:
                some_bugs = [
                    b for b in
                    [Random.int(MAX_BUG_ID) for i in range(NUM_TO_TEST)]
                    if b not in private_bugs
                ]

                Log.note("Test with the following bug_ids: {{bugs}}",
                         {"bugs": some_bugs})

                #SETUP RUN PARAMETERS
                param = Struct()
                param.end_time = CNV.datetime2milli(get_current_time(db))
                param.start_time = 0
                param.start_time_str = extract_bugzilla.milli2string(db, 0)
                param.alias_file = self.settings.param.alias_file

                try:
                    with ThreadedQueue(candidate, 100) as output:
                        etl(db, output, param, please_stop=None)

                    #COMPARE ALL BUGS
                    Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
                    found_errors = compare_both(candidate, reference,
                                                self.settings, some_bugs)
                    if found_errors:
                        Log.note("Errors found")
                        break
                    else:
                        pass
                except Exception, e:
                    Log.warning(
                        "Total failure during compare of bugs {{bugs}}",
                        {"bugs": some_bugs}, e)
Ejemplo n.º 16
0
def old2new(bug, max_date):
    """
    CONVERT THE OLD ES FORMAT TO THE NEW
    THESE ARE KNOWN CHANGES THAT SHOULD BE MADE TO THE PRODUCTION VERSION
    """
    if bug.everconfirmed != None:
        if bug.everconfirmed == "":
            bug.everconfirmed = None
        else:
            bug.everconfirmed = int(bug.everconfirmed)

    bug = CNV.JSON2object(CNV.object2JSON(bug).replace("bugzilla: other b.m.o issues ", "bugzilla: other b.m.o issues"))

    if bug.expires_on > max_date:
        bug.expires_on = parse_bug_history.MAX_TIME
    if bug.votes != None:
        bug.votes = int(bug.votes)
    bug.dupe_by = CNV.value2intlist(bug.dupe_by)
    if bug.votes == 0:
        del bug["votes"]
        # if Math.is_integer(bug.remaining_time) and int(bug.remaining_time) == 0:
    #     bug.remaining_time = 0
    if bug.cf_due_date != None and not Math.is_number(bug.cf_due_date):
        bug.cf_due_date = CNV.datetime2milli(
            CNV.string2datetime(bug.cf_due_date, "%Y-%m-%d")
        )
    bug.changes = CNV.JSON2object(
        CNV.object2JSON(Q.sort(bug.changes, "field_name")) \
            .replace("\"field_value_removed\":", "\"old_value\":") \
            .replace("\"field_value\":", "\"new_value\":")
    )

    if bug.everconfirmed == 0:
        del bug["everconfirmed"]
    if bug.id == "692436_1336314345":
        bug.votes = 3

    try:
        if Math.is_number(bug.cf_last_resolved):
            bug.cf_last_resolved = long(bug.cf_last_resolved)
        else:
            bug.cf_last_resolved = CNV.datetime2milli(CNV.string2datetime(bug.cf_last_resolved, "%Y-%m-%d %H:%M:%S"))
    except Exception, e:
        pass
Ejemplo n.º 17
0
def main(settings, es=None, es_comments=None):
    if not settings.param.allow_private_bugs and es and not es_comments:
        Log.error("Must have ES for comments")

    resume_from_last_run = File(settings.param.first_run_time).exists and not File(settings.param.last_run_time).exists

    #MAKE HANDLES TO CONTAINERS
    try:
        with DB(settings.bugzilla, readonly=True) as db:
            current_run_time, es, es_comments, last_run_time = setup_es(settings, db, es, es_comments)

            with ThreadedQueue(es, size=500, silent=True) as output_queue:
                #SETUP RUN PARAMETERS
                param = Struct()
                param.end_time = CNV.datetime2milli(get_current_time(db))
                # DB WRITES ARE DELAYED, RESULTING IN UNORDERED bug_when IN bugs_activity (AS IS ASSUMED FOR bugs(delats_ts))
                # THIS JITTER IS USUALLY NO MORE THAN ONE SECOND, BUT WE WILL GO BACK 60sec, JUST IN CASE.
                # THERE ARE OCCASIONAL WRITES THAT ARE IN GMT, BUT SINCE THEY LOOK LIKE THE FUTURE, WE CAPTURE THEM
                param.start_time = last_run_time - nvl(settings.param.look_back, 5 * 60 * 1000)  # 5 MINUTE LOOK_BACK
                param.start_time_str = extract_bugzilla.milli2string(db, param.start_time)
                param.alias_file = settings.param.alias_file
                param.allow_private_bugs = settings.param.allow_private_bugs

                if last_run_time > 0:
                    with Timer("run incremental etl"):
                        incremental_etl(settings, param, db, es, es_comments, output_queue)
                else:
                    with Timer("run full etl"):
                        full_etl(resume_from_last_run, settings, param, db, es, es_comments, output_queue)

                output_queue.add(Thread.STOP)

        if settings.es.alias:
            es.delete_all_but(settings.es.alias, settings.es.index)
            es.add_alias(settings.es.alias)

        if settings.es_comments.alias:
            es.delete_all_but(settings.es_comments.alias, settings.es_comments.index)
            es_comments.add_alias(settings.es_comments.alias)

        File(settings.param.last_run_time).write(unicode(CNV.datetime2milli(current_run_time)))
    except Exception, e:
        Log.error("Problem with main ETL loop", e)
Ejemplo n.º 18
0
def random_sample_of_bugs(settings):
    NUM_TO_TEST = 100
    MAX_BUG_ID = 900000

    with DB(settings.bugzilla) as db:
        candidate = Fake_ES(settings.fake_es)
        reference = ElasticSearch(settings.reference)

        #GO FASTER BY STORING LOCAL FILE
        local_cache = File(settings.param.temp_dir + "/private_bugs.json")
        if local_cache.exists:
            private_bugs = set(CNV.JSON2object(local_cache.read()))
        else:
            with Timer("get private bugs"):
                private_bugs = compare_es.get_private_bugs(reference)
                local_cache.write(CNV.object2JSON(private_bugs))

        while True:
            some_bugs = [
                b
                for b in [Random.int(MAX_BUG_ID) for i in range(NUM_TO_TEST)]
                if b not in private_bugs
            ]

            #SETUP RUN PARAMETERS
            param = Struct()
            param.BUGS_TABLE_COLUMNS = get_bugs_table_columns(
                db, settings.bugzilla.schema)
            param.BUGS_TABLE_COLUMNS_SQL = SQL(",\n".join(
                ["`" + c.column_name + "`" for c in param.BUGS_TABLE_COLUMNS]))
            param.BUGS_TABLE_COLUMNS = Q.select(param.BUGS_TABLE_COLUMNS,
                                                "column_name")
            param.END_TIME = CNV.datetime2milli(datetime.utcnow())
            param.START_TIME = 0
            param.alias_file = settings.param.alias_file
            param.BUG_IDS_PARTITION = SQL("bug_id in {{bugs}}",
                                          {"bugs": db.quote(some_bugs)})

            try:
                etl(db, candidate, param)

                #COMPARE ALL BUGS
                found_errors = compare_both(candidate, reference, settings,
                                            some_bugs)
                if found_errors:
                    D.println("Errors found")
                    break
                else:
                    pass
            except Exception, e:
                D.warning("Total faiure during compare of bugs {{bugs}}",
                          {"bugs": some_bugs}, e)
Ejemplo n.º 19
0
def old2new(bug):
    #THESE ARE KNOWN CHANGES THAT SHOULD BE MADE TO THE PRODUCTION VERSION
    bug.id = bug._id.replace(".", "_")[:-3]
    bug._id = None

    if bug.everconfirmed is not None:
        bug.everconfirmed = int(bug.everconfirmed)
    if bug.votes is not None: bug.votes = int(bug.votes)
    bug.dupe_by = CNV.value2intlist(bug.dupe_by)
    if bug.votes == 0: del bug["votes"]
    if Math.is_integer(bug.remaining_time) and int(bug.remaining_time) == 0:
        del bug["remaining_time"]
    if bug.cf_due_date is not None:
        bug.cf_due_date = CNV.datetime2milli(
            CNV.string2datetime(bug.cf_due_date, "%Y-%m-%d"))
    if bug.everconfirmed == 0: del bug["everconfirmed"]

    try:
        bug.cf_last_resolved = CNV.datetime2milli(
            CNV.string2datetime(bug.cf_last_resolved, "%Y-%m-%d %H:%M:%S"))
    except Exception, e:
        pass
    def test_incremental_etl_catches_tracking_flags(self):
        database.make_test_instance(self.settings.bugzilla)

        with DB(self.settings.bugzilla) as db:
            es = elasticsearch.make_test_instance("candidate",
                                                  self.settings.candidate)

            #SETUP RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(get_current_time(db))
            # FLAGS ADDED TO BUG 813650 ON 18/12/2012 2:38:08 AM (PDT), SO START AT SOME LATER TIME
            param.start_time = CNV.datetime2milli(
                CNV.string2datetime("02/01/2013 10:09:15",
                                    "%d/%m/%Y %H:%M:%S"))
            param.start_time_str = extract_bugzilla.milli2string(
                db, param.start_time)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = struct.wrap([813650])
            param.allow_private_bugs = self.settings.param.allow_private_bugs

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

            Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
            versions = get_all_bug_versions(es, 813650)

            flags = [
                "cf_status_firefox18", "cf_status_firefox19",
                "cf_status_firefox_esr17", "cf_status_b2g18"
            ]
            for v in versions:
                if v.modified_ts > param.start_time:
                    for f in flags:
                        if v[f] != "fixed":
                            Log.error("813650 should have {{flag}}=='fixed'",
                                      {"flag": f})
Ejemplo n.º 21
0
def get_all_bug_versions(es, bug_id, max_time):

    data=es.search({
        "query":{"filtered":{
            "query":{"match_all":{}},
            "filter":{"and":[
                {"term":{"bug_id":bug_id}},
                {"range":{"modified_ts":{"lte":CNV.datetime2milli(max_time)}}}
            ]}
        }},
        "from":0,
        "size":200000,
        "sort":[]
    })

    return Q.select(data.hits.hits, "_source")
Ejemplo n.º 22
0
    def random_sample_of_bugs(self):
        """
        I USE THIS TO FIND BUGS THAT CAUSE MY CODE PROBLEMS.  OF COURSE, IT ONLY WORKS
        WHEN I HAVE A REFERENCE TO COMPARE TO
        """
        NUM_TO_TEST = 100
        MAX_BUG_ID = 900000

        with DB(self.settings.bugzilla) as db:
            candidate = elasticsearch.make_test_instance("candidate", self.settings.candidate)
            reference = ElasticSearch(self.settings.private_bugs_reference)

            #GO FASTER BY STORING LOCAL FILE
            local_cache = File(self.settings.param.temp_dir + "/private_bugs.json")
            if local_cache.exists:
                private_bugs = set(CNV.JSON2object(local_cache.read()))
            else:
                with Timer("get private bugs"):
                    private_bugs = compare_es.get_private_bugs(reference)
                    local_cache.write(CNV.object2JSON(private_bugs))

            while True:
                some_bugs = [b for b in [Random.int(MAX_BUG_ID) for i in range(NUM_TO_TEST)] if b not in private_bugs]

                Log.note("Test with the following bug_ids: {{bugs}}", {"bugs":some_bugs})

                #SETUP RUN PARAMETERS
                param = Struct()
                param.end_time = CNV.datetime2milli(get_current_time(db))
                param.start_time = 0
                param.start_time_str = extract_bugzilla.milli2string(db, 0)
                param.alias_file = self.settings.param.alias_file

                try:
                    with ThreadedQueue(candidate, 100) as output:
                        etl(db, output, param, please_stop=None)

                    #COMPARE ALL BUGS
                    Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
                    found_errors = compare_both(candidate, reference, self.settings, some_bugs)
                    if found_errors:
                        Log.note("Errors found")
                        break
                    else:
                        pass
                except Exception, e:
                    Log.warning("Total failure during compare of bugs {{bugs}}", {"bugs": some_bugs}, e)
Ejemplo n.º 23
0
def random_sample_of_bugs(settings):
    NUM_TO_TEST=100
    MAX_BUG_ID=900000


    with DB(settings.bugzilla) as db:
        candidate=Fake_ES(settings.fake_es)
        reference=ElasticSearch(settings.reference)

        #GO FASTER BY STORING LOCAL FILE
        local_cache=File(settings.param.temp_dir+"/private_bugs.json")
        if local_cache.exists:
            private_bugs=set(CNV.JSON2object(local_cache.read()))
        else:
            with Timer("get private bugs"):
                private_bugs= compare_es.get_private_bugs(reference)
                local_cache.write(CNV.object2JSON(private_bugs))

        while True:
            some_bugs=[b for b in [Random.int(MAX_BUG_ID) for i in range(NUM_TO_TEST)] if b not in private_bugs]

            #SETUP RUN PARAMETERS
            param=Struct()
            param.BUGS_TABLE_COLUMNS=get_bugs_table_columns(db, settings.bugzilla.schema)
            param.BUGS_TABLE_COLUMNS_SQL=SQL(",\n".join(["`"+c.column_name+"`" for c in param.BUGS_TABLE_COLUMNS]))
            param.BUGS_TABLE_COLUMNS=Q.select(param.BUGS_TABLE_COLUMNS, "column_name")
            param.END_TIME=CNV.datetime2milli(datetime.utcnow())
            param.START_TIME=0
            param.alias_file=settings.param.alias_file
            param.BUG_IDS_PARTITION=SQL("bug_id in {{bugs}}", {"bugs":db.quote(some_bugs)})

            try:
                etl(db, candidate, param)

                #COMPARE ALL BUGS
                found_errors=compare_both(candidate, reference, settings, some_bugs)
                if found_errors:
                    D.println("Errors found")
                    break
                else:
                    pass
            except Exception, e:
                D.warning("Total faiure during compare of bugs {{bugs}}", {"bugs":some_bugs}, e)
Ejemplo n.º 24
0
    def test_specific_bugs(self):
        """
        USE A MYSQL DATABASE TO FILL AN ES INSTANCE (USE Fake_ES() INSTANCES TO KEEP
        THIS TEST LOCAL) WITH VERSIONS OF BUGS FROM settings.param.bugs.
        """
        with DB(self.settings.bugzilla) as db:
            candidate = elasticsearch.make_test_instance("candidate", self.settings.elasticsearch)

            #SETUP RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(get_current_time(db))
            param.start_time = 0
            param.start_time_str = extract_bugzilla.milli2string(db, 0)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = self.settings.param.bugs
            param.allow_private_bugs = self.settings.param.allow_private_bugs

            with ThreadedQueue(candidate, size=1000) as output:
                etl(db, output, param, please_stop=None)
    def test_specific_bugs(self):
        """
        USE A MYSQL DATABASE TO FILL AN ES INSTANCE (USE Fake_ES() INSTANCES TO KEEP
        THIS TEST LOCAL) WITH VERSIONS OF BUGS FROM settings.param.bugs.
        """
        with DB(self.settings.bugzilla) as db:
            candidate = elasticsearch.make_test_instance(
                "candidate", self.settings.elasticsearch)

            #SETUP RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(get_current_time(db))
            param.start_time = 0
            param.start_time_str = extract_bugzilla.milli2string(db, 0)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = self.settings.param.bugs
            param.allow_private_bugs = self.settings.param.allow_private_bugs

            with ThreadedQueue(candidate, size=1000) as output:
                etl(db, output, param, please_stop=None)
Ejemplo n.º 26
0
def diff(db, table, old_record, new_record):
    """
    UPDATE bugs_activity WITH THE CHANGES IN RECORDS
    """
    now = milli2string(db, CNV.datetime2milli(get_current_time(db)))
    changed = set(old_record.keys()) ^ set(new_record.keys())
    changed |= set([k for k, v in old_record.items() if v != new_record[k]])

    if table != u"bugs":
        prefix = table + u"."
    else:
        prefix = u""

    for c in changed:
        fieldid = db.query(
            "SELECT id FROM fielddefs WHERE name={{field_name}}",
            {"field_name": prefix + c})[0].id

        if fieldid == None:
            Log.error("Expecting a valid field name")

        activity = Struct(bug_id=old_record.bug_id,
                          who=1,
                          bug_when=now,
                          fieldid=fieldid,
                          removed=old_record[c],
                          added=new_record[c],
                          attach_id=old_record.attach_id,
                          comment_id=old_record.comment_id)
        db.insert("bugs_activity", activity)

    db.execute(
        "UPDATE bugs SET delta_ts={{now}} WHERE {{where}}", {
            "now": now,
            "where": esfilter2sqlwhere(db,
                                       {"term": {
                                           "bug_id": old_record.bug_id
                                       }})
        })
def get_last_updated(es):
    try:
        results = es.search({
            "query": {"filtered": {
                "query": {"match_all": {}},
                "filter": {
                    "range": {
                    "modified_ts": {"gte": CNV.datetime2milli(far_back)}}}
            }},
            "from": 0,
            "size": 0,
            "sort": [],
            "facets": {"0": {"statistical": {"field": "modified_ts"}}}
        })

        if results.facets["0"].count == 0:
            return datetime.min
        return CNV.milli2datetime(results.facets["0"].max)
    except Exception, e:
        Log.error("Can not get_last_updated from {{host}}/{{index}}",{
            "host": es.settings.host,
            "index": es.settings.index
        }, e)
def main(settings):
    #USE A FILE
    if settings.source.filename != None:
        settings.destination.alias = settings.destination.index
        settings.destination.index = ElasticSearch.proto_name(settings.destination.alias)
        schema = CNV.JSON2object(File(settings.source.schema_filename).read())
        if transform_bugzilla.USE_ATTACHMENTS_DOT:
            schema = CNV.JSON2object(CNV.object2JSON(schema).replace("attachments_", "attachments."))

        dest = ElasticSearch.create_index(settings.destination, schema, limit_replicas=True)
        dest.set_refresh_interval(-1)
        extract_from_file(settings.source, dest)
        dest.set_refresh_interval(1)

        dest.delete_all_but(settings.destination.alias, settings.destination.index)
        dest.add_alias(settings.destination.alias)
        return

    # SYNCH WITH source ES INDEX
    source=ElasticSearch(settings.source)
    destination=get_or_create_index(settings["destination"], source)

    # GET LAST UPDATED
    time_file = File(settings.param.last_replication_time)
    from_file = None
    if time_file.exists:
        from_file = CNV.milli2datetime(CNV.value2int(time_file.read()))
    from_es = get_last_updated(destination)
    last_updated = nvl(MIN(from_file, from_es), CNV.milli2datetime(0))
    current_time = datetime.utcnow()

    pending = get_pending(source, last_updated)
    with ThreadedQueue(destination, size=1000) as data_sink:
        replicate(source, data_sink, pending, last_updated)

    # RECORD LAST UPDATED
    time_file.write(unicode(CNV.datetime2milli(current_time)))
Ejemplo n.º 29
0
                                            limit_replicas=True)

            # BUG COMMENTS
            comment_schema = File(settings.es_comments.schema_file).read()
            comment_schema = CNV.JSON2object(comment_schema, paths=True)
            comment_schema.settings = jsons.expand_dot(comment_schema.settings)
            if not settings.es_comments.alias:
                settings.es_comments.alias = settings.es_comments.index
                settings.es_comments.index = ElasticSearch.proto_name(
                    settings.es_comments.alias)
            es_comments = ElasticSearch.create_index(settings.es_comments,
                                                     comment_schema,
                                                     limit_replicas=True)

        File(settings.param.first_run_time).write(
            unicode(CNV.datetime2milli(current_run_time)))

    return current_run_time, es, es_comments, last_run_time


def incremental_etl(settings, param, db, es, es_comments, output_queue):
    ####################################################################
    ## ES TAKES TIME TO DELETE RECORDS, DO DELETE FIRST WITH HOPE THE
    ## INDEX GETS A REWRITE DURING ADD OF NEW RECORDS
    ####################################################################

    #REMOVE PRIVATE BUGS
    private_bugs = get_private_bugs_for_delete(db, param)
    Log.note(
        "Ensure the following private bugs are deleted:\n{{private_bugs|indent}}",
        {"private_bugs": private_bugs})
Ejemplo n.º 30
0
def normalize(bug, old_school=False):
    bug=bug.copy()
    bug.id = unicode(bug.bug_id) + "_" + unicode(bug.modified_ts)[:-3]
    bug._id = None

    #ENSURE STRUCTURES ARE SORTED
    # Do some processing to make sure that diffing between runs stays as similar as possible.
    bug.flags=Q.sort(bug.flags, "value")

    if bug.attachments:
        if USE_ATTACHMENTS_DOT:
            bug.attachments=CNV.JSON2object(CNV.object2JSON(bug.attachments).replace("attachments_", "attachments."))
        bug.attachments = Q.sort(bug.attachments, "attach_id")
        for a in bug.attachments:
            for k,v in list(a.items()):
                if k.startswith("attachments") and (k.endswith("isobsolete") or k.endswith("ispatch") or k.endswith("isprivate")):
                    new_v=CNV.value2int(v)
                    new_k=k[12:]
                    a[k.replace(".", "\.")]=new_v
                    if not old_school:
                        a[new_k]=new_v
            a.flags = Q.sort(a.flags, ["modified_ts", "value"])

    if bug.changes != None:
        if USE_ATTACHMENTS_DOT:
            json = CNV.object2JSON(bug.changes).replace("attachments_", "attachments.")
            bug.changes=CNV.JSON2object(json)
        bug.changes = Q.sort(bug.changes, ["attach_id", "field_name"])

    #bug IS CONVERTED TO A 'CLEAN' COPY
    bug = ElasticSearch.scrub(bug)
    # bug.attachments = nvl(bug.attachments, [])    # ATTACHMENTS MUST EXIST


    for f in NUMERIC_FIELDS:
        v = bug[f]
        if v == None:
            continue
        elif f in MULTI_FIELDS:
            bug[f] = CNV.value2intlist(v)
        elif CNV.value2number(v) == 0:
            del bug[f]
        else:
            bug[f]=CNV.value2number(v)

    # Also reformat some date fields
    for dateField in ["deadline", "cf_due_date", "cf_last_resolved"]:
        v = bug[dateField]
        if v == None: continue
        try:
            if isinstance(v, date):
                bug[dateField] = CNV.datetime2milli(v)
            elif isinstance(v, long) and len(unicode(v)) in [12, 13]:
                bug[dateField] = v
            elif not isinstance(v, basestring):
                Log.error("situation not handled")
            elif DATE_PATTERN_STRICT.match(v):
                # Convert to "2012/01/01 00:00:00.000"
                # Example: bug 856732 (cf_last_resolved)
                # dateString = v.substring(0, 10).replace("/", '-') + "T" + v.substring(11) + "Z"
                bug[dateField] = CNV.datetime2milli(CNV.string2datetime(v+"000", "%Y/%m/%d %H:%M%:S%f"))
            elif DATE_PATTERN_STRICT_SHORT.match(v):
                # Convert "2012/01/01 00:00:00" to "2012-01-01T00:00:00.000Z", then to a timestamp.
                # Example: bug 856732 (cf_last_resolved)
                # dateString = v.substring(0, 10).replace("/", '-') + "T" + v.substring(11) + "Z"
                bug[dateField] = CNV.datetime2milli(CNV.string2datetime(v.replace("-", "/"), "%Y/%m/%d %H:%M:%S"))
            elif DATE_PATTERN_RELAXED.match(v):
                # Convert "2012/01/01 00:00:00.000" to "2012-01-01"
                # Example: bug 643420 (deadline)
                #          bug 726635 (cf_due_date)
                bug[dateField] = CNV.datetime2milli(CNV.string2datetime(v[0:10], "%Y-%m-%d"))
        except Exception, e:
            Log.error("problem with converting date to milli (value={{value}})", {"value":bug[dateField]}, e)
from datetime import datetime
import unittest
from pymysql.times import TimeDelta
from bzETL.extract_bugzilla import SCREENED_WHITEBOARD_BUG_GROUPS
from bzETL.util.env import startup
from bzETL.util import struct
from bzETL.util.cnv import CNV
from bzETL.util.env.elasticsearch import ElasticSearch
from bzETL.util.env.emailer import Emailer
from bzETL.util.env.logs import Log
from bzETL.util.maths import Math
from bzETL.util.queries import Q
from bzETL.util.struct import nvl

NOW = CNV.datetime2milli(datetime.utcnow())
A_WHILE_AGO = int(NOW - TimeDelta(minutes=10).total_seconds() * 1000)


class TestLookForLeaks(unittest.TestCase):
    def setUp(self):
        settings = startup.read_settings(filename="leak_check_settings.json")
        Log.start(settings.debug)
        self.private = ElasticSearch(settings.private)
        self.public = ElasticSearch(settings.public)
        self.public_comments = ElasticSearch(settings.public_comments)
        self.settings = settings

    def tearDown(self):
        Log.stop()

    def blocks_of_bugs(self):
Ejemplo n.º 32
0
def normalize(bug, old_school=False):
    bug = bug.copy()
    bug.id = unicode(bug.bug_id) + "_" + unicode(bug.modified_ts)[:-3]
    bug._id = None

    #ENSURE STRUCTURES ARE SORTED
    # Do some processing to make sure that diffing between runs stays as similar as possible.
    bug.flags = Q.sort(bug.flags, "value")

    if bug.attachments:
        if USE_ATTACHMENTS_DOT:
            bug.attachments = CNV.JSON2object(
                CNV.object2JSON(bug.attachments).replace(
                    "attachments_", "attachments."))
        bug.attachments = Q.sort(bug.attachments, "attach_id")
        for a in bug.attachments:
            for k, v in list(a.items()):
                if k.startswith("attachments") and (k.endswith("isobsolete")
                                                    or k.endswith("ispatch") or
                                                    k.endswith("isprivate")):
                    new_v = CNV.value2int(v)
                    new_k = k[12:]
                    a[k.replace(".", "\.")] = new_v
                    if not old_school:
                        a[new_k] = new_v
            a.flags = Q.sort(a.flags, ["modified_ts", "value"])

    if bug.changes != None:
        if USE_ATTACHMENTS_DOT:
            json = CNV.object2JSON(bug.changes).replace(
                "attachments_", "attachments.")
            bug.changes = CNV.JSON2object(json)
        bug.changes = Q.sort(bug.changes, ["attach_id", "field_name"])

    #bug IS CONVERTED TO A 'CLEAN' COPY
    bug = ElasticSearch.scrub(bug)
    # bug.attachments = nvl(bug.attachments, [])    # ATTACHMENTS MUST EXIST

    for f in NUMERIC_FIELDS:
        v = bug[f]
        if v == None:
            continue
        elif f in MULTI_FIELDS:
            bug[f] = CNV.value2intlist(v)
        elif CNV.value2number(v) == 0:
            del bug[f]
        else:
            bug[f] = CNV.value2number(v)

    # Also reformat some date fields
    for dateField in ["deadline", "cf_due_date", "cf_last_resolved"]:
        v = bug[dateField]
        if v == None: continue
        try:
            if isinstance(v, date):
                bug[dateField] = CNV.datetime2milli(v)
            elif isinstance(v, long) and len(unicode(v)) in [12, 13]:
                bug[dateField] = v
            elif not isinstance(v, basestring):
                Log.error("situation not handled")
            elif DATE_PATTERN_STRICT.match(v):
                # Convert to "2012/01/01 00:00:00.000"
                # Example: bug 856732 (cf_last_resolved)
                # dateString = v.substring(0, 10).replace("/", '-') + "T" + v.substring(11) + "Z"
                bug[dateField] = CNV.datetime2milli(
                    CNV.string2datetime(v + "000", "%Y/%m/%d %H:%M%:S%f"))
            elif DATE_PATTERN_STRICT_SHORT.match(v):
                # Convert "2012/01/01 00:00:00" to "2012-01-01T00:00:00.000Z", then to a timestamp.
                # Example: bug 856732 (cf_last_resolved)
                # dateString = v.substring(0, 10).replace("/", '-') + "T" + v.substring(11) + "Z"
                bug[dateField] = CNV.datetime2milli(
                    CNV.string2datetime(v.replace("-", "/"),
                                        "%Y/%m/%d %H:%M:%S"))
            elif DATE_PATTERN_RELAXED.match(v):
                # Convert "2012/01/01 00:00:00.000" to "2012-01-01"
                # Example: bug 643420 (deadline)
                #          bug 726635 (cf_due_date)
                bug[dateField] = CNV.datetime2milli(
                    CNV.string2datetime(v[0:10], "%Y-%m-%d"))
        except Exception, e:
            Log.error(
                "problem with converting date to milli (value={{value}})",
                {"value": bug[dateField]}, e)
    def test_changes_to_private_bugs_still_have_bug_group(self):
        self.settings.param.allow_private_bugs = True
        File(self.settings.param.first_run_time).delete()
        File(self.settings.param.last_run_time).delete()

        private_bugs = set(Random.sample(self.settings.param.bugs, 3))

        Log.note("The private bugs for this test are {{bugs}}",
                 {"bugs": private_bugs})

        database.make_test_instance(self.settings.bugzilla)

        #MARK SOME BUGS PRIVATE
        with DB(self.settings.bugzilla) as db:
            for b in private_bugs:
                database.add_bug_group(db, b, BUG_GROUP_FOR_TESTING)

        es = elasticsearch.make_test_instance("candidate",
                                              self.settings.real.bugs)
        es_c = elasticsearch.make_test_instance("candidate_comments",
                                                self.settings.real.comments)
        bz_etl.main(self.settings, es, es_c)

        # MAKE A CHANGE TO THE PRIVATE BUGS
        with DB(self.settings.bugzilla) as db:
            for b in private_bugs:
                old_bug = db.query(
                    "SELECT * FROM bugs WHERE bug_id={{bug_id}}",
                    {"bug_id": b})[0]
                new_bug = old_bug.copy()

                new_bug.bug_status = "NEW STATUS"
                diff(db, "bugs", old_bug, new_bug)

        #RUN INCREMENTAL
        bz_etl.main(self.settings, es, es_c)

        #VERIFY BUG GROUP STILL EXISTS
        Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
        now = datetime.utcnow()
        results = es.search({
            "query": {
                "filtered": {
                    "query": {
                        "match_all": {}
                    },
                    "filter": {
                        "and": [{
                            "terms": {
                                "bug_id": private_bugs
                            }
                        }, {
                            "range": {
                                "expires_on": {
                                    "gte": CNV.datetime2milli(now)
                                }
                            }
                        }]
                    }
                }
            },
            "from": 0,
            "size": 200000,
            "sort": []
        })
        latest_bugs = Q.select(results.hits.hits, "_source")
        latest_bugs_index = Q.unique_index(
            latest_bugs, "bug_id")  # IF NOT UNIQUE, THEN ETL IS WRONG

        for bug_id in private_bugs:
            if latest_bugs_index[bug_id] == None:
                Log.error("Expecting to find the private bug {{bug_id}}",
                          {"bug_id": bug_id})

            bug_group = latest_bugs_index[bug_id].bug_group
            if not bug_group:
                Log.error(
                    "Expecting private bug ({{bug_id}}) to have a bug group",
                    {"bug_id": bug_id})
            if BUG_GROUP_FOR_TESTING not in bug_group:
                Log.error(
                    "Expecting private bug ({{bug_id}}) to have a \"{{bug_group}}\" bug group",
                    {
                        "bug_id": bug_id,
                        "bug_group": BUG_GROUP_FOR_TESTING
                    })
    def test_incremental_has_correct_expires_on(self):
        # 813650, 726635 BOTH HAVE CHANGES IN 2013
        bugs = struct.wrap([813650, 726635])
        start_incremental = CNV.datetime2milli(
            CNV.string2datetime("2013-01-01", "%Y-%m-%d"))

        es = elasticsearch.make_test_instance("candidate",
                                              self.settings.candidate)
        with DB(self.settings.bugzilla) as db:
            #SETUP FIRST RUN PARAMETERS
            param = Struct()
            param.end_time = start_incremental
            param.start_time = 0
            param.start_time_str = extract_bugzilla.milli2string(
                db, param.start_time)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = bugs
            param.allow_private_bugs = False

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

            #SETUP INCREMENTAL RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(datetime.utcnow())
            param.start_time = start_incremental
            param.start_time_str = extract_bugzilla.milli2string(
                db, param.start_time)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = bugs
            param.allow_private_bugs = False

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

        for b in bugs:
            results = es.search({
                "query": {
                    "filtered": {
                        "query": {
                            "match_all": {}
                        },
                        "filter": {
                            "and": [{
                                "term": {
                                    "bug_id": b
                                }
                            }, {
                                "range": {
                                    "expires_on": {
                                        "gte":
                                        CNV.datetime2milli(datetime.utcnow())
                                    }
                                }
                            }]
                        }
                    }
                },
                "from": 0,
                "size": 200000,
                "sort": [],
                "fields": ["bug_id"]
            })

            if results.hits.total > 1:
                Log.error("Expecting only one active bug_version record")
Ejemplo n.º 35
0
    def test_changes_to_private_bugs_still_have_bug_group(self):
        self.settings.param.allow_private_bugs = True
        File(self.settings.param.first_run_time).delete()
        File(self.settings.param.last_run_time).delete()

        private_bugs = set(Random.sample(self.settings.param.bugs, 3))

        Log.note("The private bugs for this test are {{bugs}}", {"bugs": private_bugs})

        database.make_test_instance(self.settings.bugzilla)

        #MARK SOME BUGS PRIVATE
        with DB(self.settings.bugzilla) as db:
            for b in private_bugs:
                database.add_bug_group(db, b, BUG_GROUP_FOR_TESTING)

        es = elasticsearch.make_test_instance("candidate", self.settings.real.bugs)
        es_c = elasticsearch.make_test_instance("candidate_comments", self.settings.real.comments)
        bz_etl.main(self.settings, es, es_c)

        # MAKE A CHANGE TO THE PRIVATE BUGS
        with DB(self.settings.bugzilla) as db:
            for b in private_bugs:
                old_bug = db.query("SELECT * FROM bugs WHERE bug_id={{bug_id}}", {"bug_id": b})[0]
                new_bug = old_bug.copy()

                new_bug.bug_status = "NEW STATUS"
                diff(db, "bugs", old_bug, new_bug)


        #RUN INCREMENTAL
        bz_etl.main(self.settings, es, es_c)

        #VERIFY BUG GROUP STILL EXISTS
        Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
        now = datetime.utcnow()
        results = es.search({
            "query": {"filtered": {
                "query": {"match_all": {}},
                "filter": {"and": [
                    {"terms": {"bug_id": private_bugs}},
                    {"range": {"expires_on": {"gte": CNV.datetime2milli(now)}}}
                ]}
            }},
            "from": 0,
            "size": 200000,
            "sort": []
        })
        latest_bugs = Q.select(results.hits.hits, "_source")
        latest_bugs_index = Q.unique_index(latest_bugs, "bug_id")  # IF NOT UNIQUE, THEN ETL IS WRONG

        for bug_id in private_bugs:
            if latest_bugs_index[bug_id] == None:
                Log.error("Expecting to find the private bug {{bug_id}}", {"bug_id": bug_id})

            bug_group = latest_bugs_index[bug_id].bug_group
            if not bug_group:
                Log.error("Expecting private bug ({{bug_id}}) to have a bug group", {"bug_id": bug_id})
            if BUG_GROUP_FOR_TESTING not in bug_group:
                Log.error("Expecting private bug ({{bug_id}}) to have a \"{{bug_group}}\" bug group", {
                    "bug_id": bug_id,
                    "bug_group": BUG_GROUP_FOR_TESTING
                })
Ejemplo n.º 36
0
from datetime import datetime
import unittest
from pymysql.times import TimeDelta
from bzETL.extract_bugzilla import SCREENED_WHITEBOARD_BUG_GROUPS
from bzETL.util.env import startup
from bzETL.util import struct
from bzETL.util.cnv import CNV
from bzETL.util.env.elasticsearch import ElasticSearch
from bzETL.util.env.emailer import Emailer
from bzETL.util.env.logs import Log
from bzETL.util.maths import Math
from bzETL.util.queries import Q
from bzETL.util.struct import nvl


NOW = CNV.datetime2milli(datetime.utcnow())
A_WHILE_AGO = int(NOW - TimeDelta(minutes=10).total_seconds()*1000)


class TestLookForLeaks(unittest.TestCase):
    def setUp(self):
        settings = startup.read_settings(filename="leak_check_settings.json")
        Log.start(settings.debug)
        self.private = ElasticSearch(settings.private)
        self.public = ElasticSearch(settings.public)
        self.public_comments = ElasticSearch(settings.public_comments)
        self.settings = settings

    def tearDown(self):
        Log.stop()
Ejemplo n.º 37
0
            schema.settings=jsons.expand_dot(schema.settings)
            if not settings.es.alias:
                settings.es.alias = settings.es.index
                settings.es.index = ElasticSearch.proto_name(settings.es.alias)
            es = ElasticSearch.create_index(settings.es, schema, limit_replicas=True)

            # BUG COMMENTS
            comment_schema = File(settings.es_comments.schema_file).read()
            comment_schema=CNV.JSON2object(comment_schema, paths=True)
            comment_schema.settings=jsons.expand_dot(comment_schema.settings)
            if not settings.es_comments.alias:
                settings.es_comments.alias = settings.es_comments.index
                settings.es_comments.index = ElasticSearch.proto_name(settings.es_comments.alias)
            es_comments = ElasticSearch.create_index(settings.es_comments, comment_schema, limit_replicas=True)

        File(settings.param.first_run_time).write(unicode(CNV.datetime2milli(current_run_time)))

    return current_run_time, es, es_comments, last_run_time


def incremental_etl(settings, param, db, es, es_comments, output_queue):
    ####################################################################
    ## ES TAKES TIME TO DELETE RECORDS, DO DELETE FIRST WITH HOPE THE
    ## INDEX GETS A REWRITE DURING ADD OF NEW RECORDS
    ####################################################################

    #REMOVE PRIVATE BUGS
    private_bugs = get_private_bugs_for_delete(db, param)
    Log.note("Ensure the following private bugs are deleted:\n{{private_bugs|indent}}", {"private_bugs": private_bugs})
    for g, delete_bugs in Q.groupby(private_bugs, size=1000):
        still_existing = get_bug_ids(es, {"terms": {"bug_id": delete_bugs}})