def blocks_of_bugs(self): max_bug_id = self.private.search({ "query": { "filtered": { "query": { "match_all": {} }, "filter": { "and": [{ "match_all": {} }] } } }, "from": 0, "size": 0, "sort": [], "facets": { "0": { "statistical": { "field": "bug_id" } } } }).facets["0"].max return reversed( list(Q.intervals(0, max_bug_id, self.settings.param.increment)))
def blocks_of_bugs(self): max_bug_id = self.private.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": {"and": [{"match_all": {}}]} }}, "from": 0, "size": 0, "sort": [], "facets": {"0": {"statistical": {"field": "bug_id"}}} }).facets["0"].max return reversed(list(Q.intervals(0, max_bug_id, self.settings.param.increment)))
def main(settings, bug_list=None, please_stop=None, restart=False): """ THE CC LISTS (AND REVIEWS) ARE EMAIL ADDRESSES THE BELONG TO PEOPLE. SINCE THE EMAIL ADDRESS FOR A PERSON CAN CHANGE OVER TIME. THIS CODE WILL ASSOCIATE EACH PERSON WITH THE EMAIL ADDRESSES USED OVER THE LIFETIME OF THE BUGZILLA DATA. 'PERSON' IS ABSTRACT, AND SIMPLY ASSIGNED A CANONICAL EMAIL ADDRESS TO FACILITATE IDENTIFICATION """ if settings.args.quick: Log.note("Alias analysis skipped (--quick was used)") return if not restart: loadAliases(settings) if bug_list: with DB(settings.bugzilla, readonly=True) as db: data = get_all_cc_changes(db, bug_list) aggregator(data) analysis(settings, True, please_stop) return with DB(settings.bugzilla, readonly=True) as db: start = nvl(settings.param.start, 0) end = nvl(settings.param.end, db.query("SELECT max(bug_id)+1 bug_id FROM bugs")[0].bug_id) #Perform analysis on blocks of bugs, in case we crash partway through for s, e in Q.intervals(start, end, settings.param.alias_increment): Log.note("Load range {{start}}-{{end}}", { "start": s, "end": e }) data = get_all_cc_changes(db, range(s, e)) if please_stop: break aggregator(data) analysis(settings, e >= end, please_stop)
def main(settings, bug_list=None, please_stop=None, restart=False): """ THE CC LISTS (AND REVIEWS) ARE EMAIL ADDRESSES THE BELONG TO PEOPLE. SINCE THE EMAIL ADDRESS FOR A PERSON CAN CHANGE OVER TIME. THIS CODE WILL ASSOCIATE EACH PERSON WITH THE EMAIL ADDRESSES USED OVER THE LIFETIME OF THE BUGZILLA DATA. 'PERSON' IS ABSTRACT, AND SIMPLY ASSIGNED A CANONICAL EMAIL ADDRESS TO FACILITATE IDENTIFICATION """ if settings.args.quick: Log.note("Alias analysis skipped (--quick was used)") return if not restart: loadAliases(settings) if bug_list: with DB(settings.bugzilla, readonly=True) as db: data = get_all_cc_changes(db, bug_list) aggregator(data) analysis(settings, True, please_stop) return with DB(settings.bugzilla, readonly=True) as db: start = nvl(settings.param.start, 0) end = nvl(settings.param.end, db.query("SELECT max(bug_id)+1 bug_id FROM bugs")[0].bug_id) #Perform analysis on blocks of bugs, in case we crash partway through for s, e in Q.intervals(start, end, settings.param.alias_increment): Log.note("Load range {{start}}-{{end}}", {"start": s, "end": e}) data = get_all_cc_changes(db, range(s, e)) if please_stop: break aggregator(data) analysis(settings, e >= end, please_stop)
def full_etl(resume_from_last_run, settings, param, db, es, es_comments, output_queue): with Thread.run("alias_analysis", alias_analysis.main, settings=settings): end = nvl(settings.param.end, db.query("SELECT max(bug_id)+1 bug_id FROM bugs")[0].bug_id) start = nvl(settings.param.start, 0) if resume_from_last_run: start = nvl(settings.param.start, Math.floor(get_max_bug_id(es), settings.param.increment)) ############################################################# ## MAIN ETL LOOP ############################################################# #TWO WORKERS IS MORE THAN ENOUGH FOR A SINGLE THREAD # with Multithread([run_both_etl, run_both_etl]) as workers: for min, max in Q.intervals(start, end, settings.param.increment): if settings.args.quick and min < end - settings.param.increment and min != 0: #--quick ONLY DOES FIRST AND LAST BLOCKS continue try: #GET LIST OF CHANGED BUGS with Timer("time to get {{min}}..{{max}} bug list", {"min":min, "max":max}): if param.allow_private_bugs: bug_list = Q.select(db.query(""" SELECT b.bug_id FROM bugs b WHERE delta_ts >= {{start_time_str}} AND ({{min}} <= b.bug_id AND b.bug_id < {{max}}) """, { "min": min, "max": max, "start_time_str": param.start_time_str }), u"bug_id") else: bug_list = Q.select(db.query(""" SELECT b.bug_id FROM bugs b LEFT JOIN bug_group_map m ON m.bug_id=b.bug_id WHERE delta_ts >= {{start_time_str}} AND ({{min}} <= b.bug_id AND b.bug_id < {{max}}) AND m.bug_id IS NULL """, { "min": min, "max": max, "start_time_str": param.start_time_str }), u"bug_id") if not bug_list: continue param.bug_list = bug_list run_both_etl(**{ "db": db, "output_queue": output_queue, "es_comments": es_comments, "param": param.copy() }) except Exception, e: Log.error("Problem with dispatch loop in range [{{min}}, {{max}})", { "min": min, "max": max }, e)
def full_etl(resume_from_last_run, settings, param, db, es, es_comments, output_queue): with Thread.run("alias_analysis", alias_analysis.main, settings=settings): end = nvl(settings.param.end, db.query("SELECT max(bug_id)+1 bug_id FROM bugs")[0].bug_id) start = nvl(settings.param.start, 0) if resume_from_last_run: start = nvl( settings.param.start, Math.floor(get_max_bug_id(es), settings.param.increment)) ############################################################# ## MAIN ETL LOOP ############################################################# #TWO WORKERS IS MORE THAN ENOUGH FOR A SINGLE THREAD # with Multithread([run_both_etl, run_both_etl]) as workers: for min, max in Q.intervals(start, end, settings.param.increment): if settings.args.quick and min < end - settings.param.increment and min != 0: #--quick ONLY DOES FIRST AND LAST BLOCKS continue try: #GET LIST OF CHANGED BUGS with Timer("time to get {{min}}..{{max}} bug list", { "min": min, "max": max }): if param.allow_private_bugs: bug_list = Q.select( db.query( """ SELECT b.bug_id FROM bugs b WHERE delta_ts >= {{start_time_str}} AND ({{min}} <= b.bug_id AND b.bug_id < {{max}}) """, { "min": min, "max": max, "start_time_str": param.start_time_str }), u"bug_id") else: bug_list = Q.select( db.query( """ SELECT b.bug_id FROM bugs b LEFT JOIN bug_group_map m ON m.bug_id=b.bug_id WHERE delta_ts >= {{start_time_str}} AND ({{min}} <= b.bug_id AND b.bug_id < {{max}}) AND m.bug_id IS NULL """, { "min": min, "max": max, "start_time_str": param.start_time_str }), u"bug_id") if not bug_list: continue param.bug_list = bug_list run_both_etl( **{ "db": db, "output_queue": output_queue, "es_comments": es_comments, "param": param.copy() }) except Exception, e: Log.error( "Problem with dispatch loop in range [{{min}}, {{max}})", { "min": min, "max": max }, e)