def old2new(bug, max_date): """ CONVERT THE OLD ES FORMAT TO THE NEW THESE ARE KNOWN CHANGES THAT SHOULD BE MADE TO THE PRODUCTION VERSION """ if bug.everconfirmed != None: if bug.everconfirmed == "": bug.everconfirmed = None else: bug.everconfirmed = int(bug.everconfirmed) bug = CNV.JSON2object(CNV.object2JSON(bug).replace("bugzilla: other b.m.o issues ", "bugzilla: other b.m.o issues")) if bug.expires_on > max_date: bug.expires_on = parse_bug_history.MAX_TIME if bug.votes != None: bug.votes = int(bug.votes) bug.dupe_by = CNV.value2intlist(bug.dupe_by) if bug.votes == 0: del bug["votes"] # if Math.is_integer(bug.remaining_time) and int(bug.remaining_time) == 0: # bug.remaining_time = 0 if bug.cf_due_date != None and not Math.is_number(bug.cf_due_date): bug.cf_due_date = CNV.datetime2milli( CNV.string2datetime(bug.cf_due_date, "%Y-%m-%d") ) bug.changes = CNV.JSON2object( CNV.object2JSON(Q.sort(bug.changes, "field_name")) \ .replace("\"field_value_removed\":", "\"old_value\":") \ .replace("\"field_value\":", "\"new_value\":") ) if bug.everconfirmed == 0: del bug["everconfirmed"] if bug.id == "692436_1336314345": bug.votes = 3 try: if Math.is_number(bug.cf_last_resolved): bug.cf_last_resolved = long(bug.cf_last_resolved) else: bug.cf_last_resolved = CNV.datetime2milli(CNV.string2datetime(bug.cf_last_resolved, "%Y-%m-%d %H:%M:%S")) except Exception, e: pass
def old2new(bug): #THESE ARE KNOWN CHANGES THAT SHOULD BE MADE TO THE PRODUCTION VERSION bug.id=bug._id.replace(".", "_")[:-3] bug._id=None if bug.everconfirmed is not None: bug.everconfirmed=int(bug.everconfirmed) if bug.votes is not None: bug.votes=int(bug.votes) bug.dupe_by=CNV.value2intlist(bug.dupe_by) if bug.votes==0: del bug["votes"] if Math.is_integer(bug.remaining_time) and int(bug.remaining_time)==0: del bug["remaining_time"] if bug.cf_due_date is not None: bug.cf_due_date=CNV.datetime2milli(CNV.string2datetime(bug.cf_due_date, "%Y-%m-%d")) if bug.everconfirmed==0: del bug["everconfirmed"] try: bug.cf_last_resolved=CNV.datetime2milli(CNV.string2datetime(bug.cf_last_resolved, "%Y-%m-%d %H:%M:%S")) except Exception, e: pass
def old2new(bug): #THESE ARE KNOWN CHANGES THAT SHOULD BE MADE TO THE PRODUCTION VERSION bug.id = bug._id.replace(".", "_")[:-3] bug._id = None if bug.everconfirmed is not None: bug.everconfirmed = int(bug.everconfirmed) if bug.votes is not None: bug.votes = int(bug.votes) bug.dupe_by = CNV.value2intlist(bug.dupe_by) if bug.votes == 0: del bug["votes"] if Math.is_integer(bug.remaining_time) and int(bug.remaining_time) == 0: del bug["remaining_time"] if bug.cf_due_date is not None: bug.cf_due_date = CNV.datetime2milli( CNV.string2datetime(bug.cf_due_date, "%Y-%m-%d")) if bug.everconfirmed == 0: del bug["everconfirmed"] try: bug.cf_last_resolved = CNV.datetime2milli( CNV.string2datetime(bug.cf_last_resolved, "%Y-%m-%d %H:%M:%S")) except Exception, e: pass
def milli2datetime(r): """ CONVERT ANY longs INTO TIME STRINGS """ try: if r == None: return None elif isinstance(r, basestring): return r elif Math.is_number(r): if CNV.value2number(r) > 800000000000: return CNV.datetime2string(CNV.milli2datetime(r), "%Y-%m-%d %H:%M:%S") else: return r elif isinstance(r, dict): output = {} for k, v in r.items(): v = milli2datetime(v) if v != None: output[k.lower()] = v return output elif hasattr(r, '__iter__'): output = [] for v in r: v = milli2datetime(v) if v != None: output.append(v) if not output: return None try: return Q.sort(output) except Exception: return output else: return r except Exception, e: Log.warning("Can not scrub: {{json}}", {"json": r}, e)
def full_etl(resume_from_last_run, settings, param, db, es, es_comments, output_queue): with Thread.run("alias_analysis", alias_analysis.main, settings=settings): end = nvl(settings.param.end, db.query("SELECT max(bug_id)+1 bug_id FROM bugs")[0].bug_id) start = nvl(settings.param.start, 0) if resume_from_last_run: start = nvl(settings.param.start, Math.floor(get_max_bug_id(es), settings.param.increment)) ############################################################# ## MAIN ETL LOOP ############################################################# #TWO WORKERS IS MORE THAN ENOUGH FOR A SINGLE THREAD # with Multithread([run_both_etl, run_both_etl]) as workers: for min, max in Q.intervals(start, end, settings.param.increment): if settings.args.quick and min < end - settings.param.increment and min != 0: #--quick ONLY DOES FIRST AND LAST BLOCKS continue try: #GET LIST OF CHANGED BUGS with Timer("time to get {{min}}..{{max}} bug list", {"min":min, "max":max}): if param.allow_private_bugs: bug_list = Q.select(db.query(""" SELECT b.bug_id FROM bugs b WHERE delta_ts >= {{start_time_str}} AND ({{min}} <= b.bug_id AND b.bug_id < {{max}}) """, { "min": min, "max": max, "start_time_str": param.start_time_str }), u"bug_id") else: bug_list = Q.select(db.query(""" SELECT b.bug_id FROM bugs b LEFT JOIN bug_group_map m ON m.bug_id=b.bug_id WHERE delta_ts >= {{start_time_str}} AND ({{min}} <= b.bug_id AND b.bug_id < {{max}}) AND m.bug_id IS NULL """, { "min": min, "max": max, "start_time_str": param.start_time_str }), u"bug_id") if not bug_list: continue param.bug_list = bug_list run_both_etl(**{ "db": db, "output_queue": output_queue, "es_comments": es_comments, "param": param.copy() }) except Exception, e: Log.error("Problem with dispatch loop in range [{{min}}, {{max}})", { "min": min, "max": max }, e)
def full_etl(resume_from_last_run, settings, param, db, es, es_comments, output_queue): with Thread.run("alias_analysis", alias_analysis.main, settings=settings): end = nvl(settings.param.end, db.query("SELECT max(bug_id)+1 bug_id FROM bugs")[0].bug_id) start = nvl(settings.param.start, 0) if resume_from_last_run: start = nvl( settings.param.start, Math.floor(get_max_bug_id(es), settings.param.increment)) ############################################################# ## MAIN ETL LOOP ############################################################# #TWO WORKERS IS MORE THAN ENOUGH FOR A SINGLE THREAD # with Multithread([run_both_etl, run_both_etl]) as workers: for min, max in Q.intervals(start, end, settings.param.increment): if settings.args.quick and min < end - settings.param.increment and min != 0: #--quick ONLY DOES FIRST AND LAST BLOCKS continue try: #GET LIST OF CHANGED BUGS with Timer("time to get {{min}}..{{max}} bug list", { "min": min, "max": max }): if param.allow_private_bugs: bug_list = Q.select( db.query( """ SELECT b.bug_id FROM bugs b WHERE delta_ts >= {{start_time_str}} AND ({{min}} <= b.bug_id AND b.bug_id < {{max}}) """, { "min": min, "max": max, "start_time_str": param.start_time_str }), u"bug_id") else: bug_list = Q.select( db.query( """ SELECT b.bug_id FROM bugs b LEFT JOIN bug_group_map m ON m.bug_id=b.bug_id WHERE delta_ts >= {{start_time_str}} AND ({{min}} <= b.bug_id AND b.bug_id < {{max}}) AND m.bug_id IS NULL """, { "min": min, "max": max, "start_time_str": param.start_time_str }), u"bug_id") if not bug_list: continue param.bug_list = bug_list run_both_etl( **{ "db": db, "output_queue": output_queue, "es_comments": es_comments, "param": param.copy() }) except Exception, e: Log.error( "Problem with dispatch loop in range [{{min}}, {{max}})", { "min": min, "max": max }, e)