def main(settings): file = File(settings.param.alias_file) aliases = CNV.JSON2object(file.read()) for v in aliases.values(): v.candidates = CNV.dict2Multiset(v.candidates) data = [ { "lost": n, "found": d.canonical } for n, d in aliases.items() if d.canonical != None and n != d.canonical ] sorted = Q.sort(data, "found") for s in sorted: Log.note("{{found}} == {{lost}}", s) clean = { n: d.canonical for n, d in aliases.items() if d.canonical != None and n != d.canonical and n != "" } rev_clean = struct.inverse(clean) Log.note(CNV.object2JSON(rev_clean, pretty=True)) for k, v in rev_clean.items(): if len(v) > 3: Log.note(CNV.object2JSON({k: v}, pretty=True))
def get_private_bugs(es): """ FIND THE BUGS WE DO NOT EXPECT TO BE FOUND IN PUBLIC """ data = es.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": {"and": [ {"script": {"script": "true"}}, {"and": [{"exists": {"field": "bug_group"}}]} ]} }}, "from": 0, "size": 200000, "sort": [], "facets": {}, "fields": ["bug_id", "blocked", "dependson", "dupe_of", "dupe_by"] }) with Timer("aggregate es results on private bugs"): output = set([]) for bug in data.hits.hits: output.add(bug.fields.bug_id) output |= set(nvl(CNV.value2intlist(bug.fields.blocked), [])) output |= set(nvl(CNV.value2intlist(bug.fields.dependson), [])) output |= set(nvl(CNV.value2intlist(bug.fields.dupe_of), [])) output |= set(nvl(CNV.value2intlist(bug.fields.dupe_by), [])) output.add(551988, 636964) return output
def main(settings): file = File(settings.param.alias_file) aliases = CNV.JSON2object(file.read()) for v in aliases.values(): v.candidates = CNV.dict2Multiset(v.candidates) data = [{ "lost": n, "found": d.canonical } for n, d in aliases.items() if d.canonical != None and n != d.canonical] sorted = Q.sort(data, "found") for s in sorted: Log.note("{{found}} == {{lost}}", s) clean = { n: d.canonical for n, d in aliases.items() if d.canonical != None and n != d.canonical and n != "" } rev_clean = struct.inverse(clean) Log.note(CNV.object2JSON(rev_clean, pretty=True)) for k, v in rev_clean.items(): if len(v) > 3: Log.note(CNV.object2JSON({k: v}, pretty=True))
def test_incremental_etl_catches_tracking_flags(self): database.make_test_instance(self.settings.bugzilla) with DB(self.settings.bugzilla) as db: es = elasticsearch.make_test_instance("candidate", self.settings.candidate) #SETUP RUN PARAMETERS param = Struct() param.end_time = CNV.datetime2milli(get_current_time(db)) # FLAGS ADDED TO BUG 813650 ON 18/12/2012 2:38:08 AM (PDT), SO START AT SOME LATER TIME param.start_time = CNV.datetime2milli(CNV.string2datetime("02/01/2013 10:09:15", "%d/%m/%Y %H:%M:%S")) param.start_time_str = extract_bugzilla.milli2string(db, param.start_time) param.alias_file = self.settings.param.alias_file param.bug_list = struct.wrap([813650]) param.allow_private_bugs = self.settings.param.allow_private_bugs with ThreadedQueue(es, size=1000) as output: etl(db, output, param, please_stop=None) Thread.sleep(2) # MUST SLEEP WHILE ES DOES ITS INDEXING versions = get_all_bug_versions(es, 813650) flags = ["cf_status_firefox18", "cf_status_firefox19", "cf_status_firefox_esr17", "cf_status_b2g18"] for v in versions: if v.modified_ts>param.start_time: for f in flags: if v[f] != "fixed": Log.error("813650 should have {{flag}}=='fixed'", {"flag": f})
def rename_attachments(bug_version): if bug_version.attachments == None: return bug_version if not USE_ATTACHMENTS_DOT: bug_version.attachments = CNV.JSON2object( CNV.object2JSON(bug_version.attachments).replace( "attachments.", "attachments_")) return bug_version
def random_sample_of_bugs(self): """ I USE THIS TO FIND BUGS THAT CAUSE MY CODE PROBLEMS. OF COURSE, IT ONLY WORKS WHEN I HAVE A REFERENCE TO COMPARE TO """ NUM_TO_TEST = 100 MAX_BUG_ID = 900000 with DB(self.settings.bugzilla) as db: candidate = elasticsearch.make_test_instance( "candidate", self.settings.candidate) reference = ElasticSearch(self.settings.private_bugs_reference) #GO FASTER BY STORING LOCAL FILE local_cache = File(self.settings.param.temp_dir + "/private_bugs.json") if local_cache.exists: private_bugs = set(CNV.JSON2object(local_cache.read())) else: with Timer("get private bugs"): private_bugs = compare_es.get_private_bugs(reference) local_cache.write(CNV.object2JSON(private_bugs)) while True: some_bugs = [ b for b in [Random.int(MAX_BUG_ID) for i in range(NUM_TO_TEST)] if b not in private_bugs ] Log.note("Test with the following bug_ids: {{bugs}}", {"bugs": some_bugs}) #SETUP RUN PARAMETERS param = Struct() param.end_time = CNV.datetime2milli(get_current_time(db)) param.start_time = 0 param.start_time_str = extract_bugzilla.milli2string(db, 0) param.alias_file = self.settings.param.alias_file try: with ThreadedQueue(candidate, 100) as output: etl(db, output, param, please_stop=None) #COMPARE ALL BUGS Thread.sleep(2) # MUST SLEEP WHILE ES DOES ITS INDEXING found_errors = compare_both(candidate, reference, self.settings, some_bugs) if found_errors: Log.note("Errors found") break else: pass except Exception, e: Log.warning( "Total failure during compare of bugs {{bugs}}", {"bugs": some_bugs}, e)
def main(settings, es=None, es_comments=None): if not settings.param.allow_private_bugs and es and not es_comments: Log.error("Must have ES for comments") resume_from_last_run = File( settings.param.first_run_time).exists and not File( settings.param.last_run_time).exists #MAKE HANDLES TO CONTAINERS try: with DB(settings.bugzilla, readonly=True) as db: current_run_time, es, es_comments, last_run_time = setup_es( settings, db, es, es_comments) with ThreadedQueue(es, size=500, silent=True) as output_queue: #SETUP RUN PARAMETERS param = Struct() param.end_time = CNV.datetime2milli(get_current_time(db)) # DB WRITES ARE DELAYED, RESULTING IN UNORDERED bug_when IN bugs_activity (AS IS ASSUMED FOR bugs(delats_ts)) # THIS JITTER IS USUALLY NO MORE THAN ONE SECOND, BUT WE WILL GO BACK 60sec, JUST IN CASE. # THERE ARE OCCASIONAL WRITES THAT ARE IN GMT, BUT SINCE THEY LOOK LIKE THE FUTURE, WE CAPTURE THEM param.start_time = last_run_time - nvl( settings.param.look_back, 5 * 60 * 1000) # 5 MINUTE LOOK_BACK param.start_time_str = extract_bugzilla.milli2string( db, param.start_time) param.alias_file = settings.param.alias_file param.allow_private_bugs = settings.param.allow_private_bugs if last_run_time > 0: with Timer("run incremental etl"): incremental_etl(settings, param, db, es, es_comments, output_queue) else: with Timer("run full etl"): full_etl(resume_from_last_run, settings, param, db, es, es_comments, output_queue) output_queue.add(Thread.STOP) if settings.es.alias: es.delete_all_but(settings.es.alias, settings.es.index) es.add_alias(settings.es.alias) if settings.es_comments.alias: es.delete_all_but(settings.es_comments.alias, settings.es_comments.index) es_comments.add_alias(settings.es_comments.alias) File(settings.param.last_run_time).write( unicode(CNV.datetime2milli(current_run_time))) except Exception, e: Log.error("Problem with main ETL loop", e)
def random_sample_of_bugs(settings): NUM_TO_TEST = 100 MAX_BUG_ID = 900000 with DB(settings.bugzilla) as db: candidate = Fake_ES(settings.fake_es) reference = ElasticSearch(settings.reference) #GO FASTER BY STORING LOCAL FILE local_cache = File(settings.param.temp_dir + "/private_bugs.json") if local_cache.exists: private_bugs = set(CNV.JSON2object(local_cache.read())) else: with Timer("get private bugs"): private_bugs = compare_es.get_private_bugs(reference) local_cache.write(CNV.object2JSON(private_bugs)) while True: some_bugs = [ b for b in [Random.int(MAX_BUG_ID) for i in range(NUM_TO_TEST)] if b not in private_bugs ] #SETUP RUN PARAMETERS param = Struct() param.BUGS_TABLE_COLUMNS = get_bugs_table_columns( db, settings.bugzilla.schema) param.BUGS_TABLE_COLUMNS_SQL = SQL(",\n".join( ["`" + c.column_name + "`" for c in param.BUGS_TABLE_COLUMNS])) param.BUGS_TABLE_COLUMNS = Q.select(param.BUGS_TABLE_COLUMNS, "column_name") param.END_TIME = CNV.datetime2milli(datetime.utcnow()) param.START_TIME = 0 param.alias_file = settings.param.alias_file param.BUG_IDS_PARTITION = SQL("bug_id in {{bugs}}", {"bugs": db.quote(some_bugs)}) try: etl(db, candidate, param) #COMPARE ALL BUGS found_errors = compare_both(candidate, reference, settings, some_bugs) if found_errors: D.println("Errors found") break else: pass except Exception, e: D.warning("Total faiure during compare of bugs {{bugs}}", {"bugs": some_bugs}, e)
def test_confidential_whiteboard_is_screened(self): leaked_whiteboard = get( self.private, { "and": [ { "terms": { "bug_group": SCREENED_WHITEBOARD_BUG_GROUPS } }, { "exists": { "field": "status_whiteboard" } }, { "not": { "terms": { "status_whiteboard": ["", "[screened]"] } } }, { "range": { "expires_on": { "gte": NOW } } }, #CURRENT RECORDS { "range": { "modified_ts": { "lt": A_WHILE_AGO } } }, #OF A MINIMUM AGE ] }, fields=[ "bug_id", "product", "component", "status_whiteboard", "bug_group", "modified_ts" ], limit=100) if leaked_whiteboard: for l in leaked_whiteboard: l.modified_ts = CNV.datetime2string( CNV.milli2datetime(l.modified_ts)) Log.error("Whiteboard leaking:\b{{leak}}", {"leak": leaked_whiteboard})
def test_incremental_has_correct_expires_on(self): # 813650, 726635 BOTH HAVE CHANGES IN 2013 bugs = struct.wrap([813650, 726635]) start_incremental=CNV.datetime2milli(CNV.string2datetime("2013-01-01", "%Y-%m-%d")) es = elasticsearch.make_test_instance("candidate", self.settings.candidate) with DB(self.settings.bugzilla) as db: #SETUP FIRST RUN PARAMETERS param = Struct() param.end_time = start_incremental param.start_time = 0 param.start_time_str = extract_bugzilla.milli2string(db, param.start_time) param.alias_file = self.settings.param.alias_file param.bug_list = bugs param.allow_private_bugs = False with ThreadedQueue(es, size=1000) as output: etl(db, output, param, please_stop=None) #SETUP INCREMENTAL RUN PARAMETERS param = Struct() param.end_time = CNV.datetime2milli(datetime.utcnow()) param.start_time = start_incremental param.start_time_str = extract_bugzilla.milli2string(db, param.start_time) param.alias_file = self.settings.param.alias_file param.bug_list = bugs param.allow_private_bugs = False with ThreadedQueue(es, size=1000) as output: etl(db, output, param, please_stop=None) for b in bugs: results = es.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": {"and":[ {"term":{"bug_id":b}}, {"range":{"expires_on":{"gte":CNV.datetime2milli(datetime.utcnow())}}} ]} }}, "from": 0, "size": 200000, "sort": [], "fields": ["bug_id"] }) if results.hits.total>1: Log.error("Expecting only one active bug_version record")
def test_ambiguous_whiteboard_screened(self): GOOD_BUG_TO_TEST=1046 database.make_test_instance(self.settings.bugzilla) with DB(self.settings.bugzilla) as db: es = elasticsearch.make_test_instance("candidate", self.settings.candidate) #MARK BUG AS ONE OF THE SCREENED GROUPS database.add_bug_group(db, GOOD_BUG_TO_TEST, SCREENED_WHITEBOARD_BUG_GROUPS[0]) #MARK BUG AS ONE OF THE *NOT* SCREENED GROUPS database.add_bug_group(db, GOOD_BUG_TO_TEST, "not screened") db.flush() #SETUP RUN PARAMETERS param = Struct() param.end_time = CNV.datetime2milli(get_current_time(db)) param.start_time = 0 param.start_time_str = extract_bugzilla.milli2string(db, 0) param.alias_file = self.settings.param.alias_file param.bug_list = struct.wrap([GOOD_BUG_TO_TEST]) # bug 1046 sees lots of whiteboard, and other field, changes param.allow_private_bugs = True with ThreadedQueue(es, size=1000) as output: etl(db, output, param, please_stop=None) Thread.sleep(2) # MUST SLEEP WHILE ES DOES ITS INDEXING versions = get_all_bug_versions(es, GOOD_BUG_TO_TEST) for v in versions: if v.status_whiteboard not in (None, "", "[screened]"): Log.error("Expecting whiteboard to be screened")
def replicate(source, destination, pending, last_updated): """ COPY source RECORDS TO destination """ for g, bugs in Q.groupby(pending, max_size=BATCH_SIZE): with Timer("Replicate {{num_bugs}} bug versions", {"num_bugs": len(bugs)}): data = source.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": {"and": [ {"terms": {"bug_id": set(bugs)}}, {"range": {"modified_ts": {"gte": CNV.datetime2milli(last_updated)} }} ]} }}, "from": 0, "size": 200000, "sort": [] }) d2 = map( lambda(x): {"id": x.id, "value": x}, map( lambda(x): transform_bugzilla.normalize(transform_bugzilla.rename_attachments(x._source), old_school=True), data.hits.hits ) ) destination.extend(d2)
def compare_both(candidate, reference, settings, some_bugs): File(settings.param.errors).delete() for bug_id in some_bugs: versions = Q.sort( get_all_bug_versions(candidate, bug_id, datetime.utcnow()), "modified_ts") # WE CAN NOT EXPECT candidate TO BE UP TO DATE BECAUSE IT IS USING AN OLD IMAGE if len(versions) == 0: max_time = datetime.utcnow() else: max_time = CNV.milli2datetime(versions[-1].modified_ts) ref_versions = Q.sort( map(compare_es.old2new, get_all_bug_versions(reference, bug_id, max_time)), "modified_ts") can = json.dumps(json_scrub(versions), indent=4, sort_keys=True, separators=(',', ': ')) ref = json.dumps(json_scrub(ref_versions), indent=4, sort_keys=True, separators=(',', ': ')) found_errors = False if can != ref: found_errors = True File(settings.param.errors + "/try/" + str(bug_id) + ".txt").write(can) File(settings.param.errors + "/exp/" + str(bug_id) + ".txt").write(ref) return found_errors
def test_specific_bugs(self): """ USE A MYSQL DATABASE TO FILL AN ES INSTANCE (USE Fake_ES() INSTANCES TO KEEP THIS TEST LOCAL) WITH VERSIONS OF BUGS FROM settings.param.bugs. COMPARE THOSE VERSIONS TO A REFERENCE ES (ALSO CHECKED INTO REPOSITORY) """ # settings.param.allow_private_bugs = True database.make_test_instance(self.settings.bugzilla) with DB(self.settings.bugzilla) as db: candidate = elasticsearch.make_test_instance("candidate", self.settings.candidate) reference = elasticsearch.open_test_instance("reference", self.settings.private_bugs_reference) #SETUP RUN PARAMETERS param = Struct() param.end_time = CNV.datetime2milli(get_current_time(db)) param.start_time = 0 param.start_time_str = extract_bugzilla.milli2string(db, 0) param.alias_file = self.settings.param.alias_file param.bug_list = self.settings.param.bugs param.allow_private_bugs = self.settings.param.allow_private_bugs with ThreadedQueue(candidate, size=1000) as output: etl(db, output, param, please_stop=None) #COMPARE ALL BUGS Thread.sleep(2) # MUST SLEEP WHILE ES DOES ITS INDEXING compare_both(candidate, reference, self.settings, self.settings.param.bugs)
def main(settings): #MAKE HANDLES TO CONTAINERS with DB(settings.bugzilla) as db: #REAL ES # if settings.candidate.alias is None: # settings.candidate.alias=settings.candidate.index # settings.candidate.index=settings.candidate.alias+CNV.datetime2string(datetime.utcnow(), "%Y%m%d_%H%M%S") # candidate=ElasticSearch.create_index(settings.candidate, File(settings.candidate.schema_file).read()) candidate=Fake_ES(settings.fake_es) reference=ElasticSearch(settings.reference) #SETUP RUN PARAMETERS param=Struct() param.BUGS_TABLE_COLUMNS=get_bugs_table_columns(db, settings.bugzilla.schema) param.BUGS_TABLE_COLUMNS_SQL=SQL(",\n".join(["`"+c.column_name+"`" for c in param.BUGS_TABLE_COLUMNS])) param.BUGS_TABLE_COLUMNS=Q.select(param.BUGS_TABLE_COLUMNS, "column_name") param.END_TIME=CNV.datetime2milli(datetime.utcnow()) param.START_TIME=0 param.alias_file=settings.param.alias_file param.BUG_IDS_PARTITION=SQL("bug_id in {{bugs}}", {"bugs":db.quote(settings.param.bugs)}) etl(db, candidate, param) #COMPARE ALL BUGS compare_both(candidate, reference, settings, settings.param.bugs)
def __init__(self, settings): self.settings = wrap({"host":"fake", "index":"fake"}) self.filename = settings.filename try: self.data = CNV.JSON2object(File(self.filename).read()) except IOError: self.data = Struct()
def diff(db, table, old_record, new_record): """ UPDATE bugs_activity WITH THE CHANGES IN RECORDS """ now = milli2string(db, CNV.datetime2milli(get_current_time(db))) changed = set(old_record.keys()) ^ set(new_record.keys()) changed |= set([k for k, v in old_record.items() if v != new_record[k]]) if table != u"bugs": prefix = table + u"." else: prefix = u"" for c in changed: fieldid=db.query("SELECT id FROM fielddefs WHERE name={{field_name}}", {"field_name": prefix + c})[0].id if fieldid == None: Log.error("Expecting a valid field name") activity = Struct( bug_id=old_record.bug_id, who=1, bug_when=now, fieldid=fieldid, removed=old_record[c], added=new_record[c], attach_id=old_record.attach_id, comment_id=old_record.comment_id ) db.insert("bugs_activity", activity) db.execute("UPDATE bugs SET delta_ts={{now}} WHERE {{where}}", { "now":now, "where":esfilter2sqlwhere(db, {"term":{"bug_id":old_record.bug_id}}) })
def get_pending(es, since): result = es.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": { "range": {"modified_ts": {"gte": CNV.datetime2milli(since)}}} }}, "from": 0, "size": 0, "sort": [], "facets": {"default": {"terms": {"field": "bug_id", "size": 200000}}} }) if len(result.facets.default.terms) >= 200000: Log.error("Can not handle more than 200K bugs changed") pending_bugs = Multiset( result.facets.default.terms, key_field="term", count_field="count" ) Log.note("Source has {{num}} bug versions for updating", { "num": len(pending_bugs) }) return pending_bugs
def get_all_bug_versions(es, bug_id, max_time): data = es.search({ "query": { "filtered": { "query": { "match_all": {} }, "filter": { "and": [{ "term": { "bug_id": bug_id } }, { "range": { "modified_ts": { "lte": CNV.datetime2milli(max_time) } } }] } } }, "from": 0, "size": 200000, "sort": [] }) return Q.select(data.hits.hits, "_source")
def main(settings): #MAKE HANDLES TO CONTAINERS with DB(settings.bugzilla) as db: #REAL ES # if settings.candidate.alias is None: # settings.candidate.alias=settings.candidate.index # settings.candidate.index=settings.candidate.alias+CNV.datetime2string(datetime.utcnow(), "%Y%m%d_%H%M%S") # candidate=ElasticSearch.create_index(settings.candidate, File(settings.candidate.schema_file).read()) candidate = Fake_ES(settings.fake_es) reference = ElasticSearch(settings.reference) #SETUP RUN PARAMETERS param = Struct() param.BUGS_TABLE_COLUMNS = get_bugs_table_columns( db, settings.bugzilla.schema) param.BUGS_TABLE_COLUMNS_SQL = SQL(",\n".join( ["`" + c.column_name + "`" for c in param.BUGS_TABLE_COLUMNS])) param.BUGS_TABLE_COLUMNS = Q.select(param.BUGS_TABLE_COLUMNS, "column_name") param.END_TIME = CNV.datetime2milli(datetime.utcnow()) param.START_TIME = 0 param.alias_file = settings.param.alias_file param.BUG_IDS_PARTITION = SQL("bug_id in {{bugs}}", {"bugs": db.quote(settings.param.bugs)}) etl(db, candidate, param) #COMPARE ALL BUGS compare_both(candidate, reference, settings, settings.param.bugs)
def test_specific_bugs(self): """ USE A MYSQL DATABASE TO FILL AN ES INSTANCE (USE Fake_ES() INSTANCES TO KEEP THIS TEST LOCAL) WITH VERSIONS OF BUGS FROM settings.param.bugs. COMPARE THOSE VERSIONS TO A REFERENCE ES (ALSO CHECKED INTO REPOSITORY) """ # settings.param.allow_private_bugs = True database.make_test_instance(self.settings.bugzilla) with DB(self.settings.bugzilla) as db: candidate = elasticsearch.make_test_instance( "candidate", self.settings.candidate) reference = elasticsearch.open_test_instance( "reference", self.settings.private_bugs_reference) #SETUP RUN PARAMETERS param = Struct() param.end_time = CNV.datetime2milli(get_current_time(db)) param.start_time = 0 param.start_time_str = extract_bugzilla.milli2string(db, 0) param.alias_file = self.settings.param.alias_file param.bug_list = self.settings.param.bugs param.allow_private_bugs = self.settings.param.allow_private_bugs with ThreadedQueue(candidate, size=1000) as output: etl(db, output, param, please_stop=None) #COMPARE ALL BUGS Thread.sleep(2) # MUST SLEEP WHILE ES DOES ITS INDEXING compare_both(candidate, reference, self.settings, self.settings.param.bugs)
def test_whiteboard_screened(self): GOOD_BUG_TO_TEST = 1046 database.make_test_instance(self.settings.bugzilla) with DB(self.settings.bugzilla) as db: es = elasticsearch.make_test_instance("candidate", self.settings.candidate) #MARK BUG AS ONE OF THE SCREENED GROUPS database.add_bug_group(db, GOOD_BUG_TO_TEST, SCREENED_WHITEBOARD_BUG_GROUPS[0]) db.flush() #SETUP RUN PARAMETERS param = Struct() param.end_time = CNV.datetime2milli(get_current_time(db)) param.start_time = 0 param.start_time_str = extract_bugzilla.milli2string(db, 0) param.alias_file = self.settings.param.alias_file param.bug_list = struct.wrap([ GOOD_BUG_TO_TEST ]) # bug 1046 sees lots of whiteboard, and other field, changes param.allow_private_bugs = True with ThreadedQueue(es, size=1000) as output: etl(db, output, param, please_stop=None) Thread.sleep(2) # MUST SLEEP WHILE ES DOES ITS INDEXING versions = get_all_bug_versions(es, GOOD_BUG_TO_TEST) for v in versions: if v.status_whiteboard not in (None, "", "[screened]"): Log.error("Expecting whiteboard to be screened")
def random_sample_of_bugs(self): """ I USE THIS TO FIND BUGS THAT CAUSE MY CODE PROBLEMS. OF COURSE, IT ONLY WORKS WHEN I HAVE A REFERENCE TO COMPARE TO """ NUM_TO_TEST = 100 MAX_BUG_ID = 900000 with DB(self.settings.bugzilla) as db: candidate = elasticsearch.make_test_instance("candidate", self.settings.candidate) reference = ElasticSearch(self.settings.private_bugs_reference) #GO FASTER BY STORING LOCAL FILE local_cache = File(self.settings.param.temp_dir + "/private_bugs.json") if local_cache.exists: private_bugs = set(CNV.JSON2object(local_cache.read())) else: with Timer("get private bugs"): private_bugs = compare_es.get_private_bugs(reference) local_cache.write(CNV.object2JSON(private_bugs)) while True: some_bugs = [b for b in [Random.int(MAX_BUG_ID) for i in range(NUM_TO_TEST)] if b not in private_bugs] Log.note("Test with the following bug_ids: {{bugs}}", {"bugs":some_bugs}) #SETUP RUN PARAMETERS param = Struct() param.end_time = CNV.datetime2milli(get_current_time(db)) param.start_time = 0 param.start_time_str = extract_bugzilla.milli2string(db, 0) param.alias_file = self.settings.param.alias_file try: with ThreadedQueue(candidate, 100) as output: etl(db, output, param, please_stop=None) #COMPARE ALL BUGS Thread.sleep(2) # MUST SLEEP WHILE ES DOES ITS INDEXING found_errors = compare_both(candidate, reference, self.settings, some_bugs) if found_errors: Log.note("Errors found") break else: pass except Exception, e: Log.warning("Total failure during compare of bugs {{bugs}}", {"bugs": some_bugs}, e)
def search(self, query): query=wrap(query) f = CNV.esfilter2where(query.query.filtered.filter) filtered=wrap([{"_id": i, "_source": d} for i, d in self.data.items() if f(d)]) if query.fields: return wrap({"hits": {"total":len(filtered), "hits": [{"_id":d._id, "fields":unwrap(Q.select([unwrap(d._source)], query.fields)[0])} for d in filtered]}}) else: return wrap({"hits": {"total":len(filtered), "hits": filtered}})
def get_current_time(db): """ RETURN GMT TIME """ output = db.query(u""" SELECT UNIX_TIMESTAMP(now()) `value` """)[0].value return CNV.unix2datetime(output)
def old2new(bug): #THESE ARE KNOWN CHANGES THAT SHOULD BE MADE TO THE PRODUCTION VERSION bug.id=bug._id.replace(".", "_")[:-3] bug._id=None if bug.everconfirmed is not None: bug.everconfirmed=int(bug.everconfirmed) if bug.votes is not None: bug.votes=int(bug.votes) bug.dupe_by=CNV.value2intlist(bug.dupe_by) if bug.votes==0: del bug["votes"] if Math.is_integer(bug.remaining_time) and int(bug.remaining_time)==0: del bug["remaining_time"] if bug.cf_due_date is not None: bug.cf_due_date=CNV.datetime2milli(CNV.string2datetime(bug.cf_due_date, "%Y-%m-%d")) if bug.everconfirmed==0: del bug["everconfirmed"] try: bug.cf_last_resolved=CNV.datetime2milli(CNV.string2datetime(bug.cf_last_resolved, "%Y-%m-%d %H:%M:%S")) except Exception, e: pass
def random_sample_of_bugs(settings): NUM_TO_TEST=100 MAX_BUG_ID=900000 with DB(settings.bugzilla) as db: candidate=Fake_ES(settings.fake_es) reference=ElasticSearch(settings.reference) #GO FASTER BY STORING LOCAL FILE local_cache=File(settings.param.temp_dir+"/private_bugs.json") if local_cache.exists: private_bugs=set(CNV.JSON2object(local_cache.read())) else: with Timer("get private bugs"): private_bugs= compare_es.get_private_bugs(reference) local_cache.write(CNV.object2JSON(private_bugs)) while True: some_bugs=[b for b in [Random.int(MAX_BUG_ID) for i in range(NUM_TO_TEST)] if b not in private_bugs] #SETUP RUN PARAMETERS param=Struct() param.BUGS_TABLE_COLUMNS=get_bugs_table_columns(db, settings.bugzilla.schema) param.BUGS_TABLE_COLUMNS_SQL=SQL(",\n".join(["`"+c.column_name+"`" for c in param.BUGS_TABLE_COLUMNS])) param.BUGS_TABLE_COLUMNS=Q.select(param.BUGS_TABLE_COLUMNS, "column_name") param.END_TIME=CNV.datetime2milli(datetime.utcnow()) param.START_TIME=0 param.alias_file=settings.param.alias_file param.BUG_IDS_PARTITION=SQL("bug_id in {{bugs}}", {"bugs":db.quote(some_bugs)}) try: etl(db, candidate, param) #COMPARE ALL BUGS found_errors=compare_both(candidate, reference, settings, some_bugs) if found_errors: D.println("Errors found") break else: pass except Exception, e: D.warning("Total faiure during compare of bugs {{bugs}}", {"bugs":some_bugs}, e)
def main(settings, es=None, es_comments=None): if not settings.param.allow_private_bugs and es and not es_comments: Log.error("Must have ES for comments") resume_from_last_run = File(settings.param.first_run_time).exists and not File(settings.param.last_run_time).exists #MAKE HANDLES TO CONTAINERS try: with DB(settings.bugzilla, readonly=True) as db: current_run_time, es, es_comments, last_run_time = setup_es(settings, db, es, es_comments) with ThreadedQueue(es, size=500, silent=True) as output_queue: #SETUP RUN PARAMETERS param = Struct() param.end_time = CNV.datetime2milli(get_current_time(db)) # DB WRITES ARE DELAYED, RESULTING IN UNORDERED bug_when IN bugs_activity (AS IS ASSUMED FOR bugs(delats_ts)) # THIS JITTER IS USUALLY NO MORE THAN ONE SECOND, BUT WE WILL GO BACK 60sec, JUST IN CASE. # THERE ARE OCCASIONAL WRITES THAT ARE IN GMT, BUT SINCE THEY LOOK LIKE THE FUTURE, WE CAPTURE THEM param.start_time = last_run_time - nvl(settings.param.look_back, 5 * 60 * 1000) # 5 MINUTE LOOK_BACK param.start_time_str = extract_bugzilla.milli2string(db, param.start_time) param.alias_file = settings.param.alias_file param.allow_private_bugs = settings.param.allow_private_bugs if last_run_time > 0: with Timer("run incremental etl"): incremental_etl(settings, param, db, es, es_comments, output_queue) else: with Timer("run full etl"): full_etl(resume_from_last_run, settings, param, db, es, es_comments, output_queue) output_queue.add(Thread.STOP) if settings.es.alias: es.delete_all_but(settings.es.alias, settings.es.index) es.add_alias(settings.es.alias) if settings.es_comments.alias: es.delete_all_but(settings.es_comments.alias, settings.es_comments.index) es_comments.add_alias(settings.es_comments.alias) File(settings.param.last_run_time).write(unicode(CNV.datetime2milli(current_run_time))) except Exception, e: Log.error("Problem with main ETL loop", e)
def test_confidential_whiteboard_is_screened(self): leaked_whiteboard = get( self.private, {"and": [ {"terms": {"bug_group": SCREENED_WHITEBOARD_BUG_GROUPS}}, {"exists": {"field": "status_whiteboard"}}, {"not": {"terms": {"status_whiteboard": ["", "[screened]"]}}}, {"range": {"expires_on": {"gte": NOW}}}, #CURRENT RECORDS {"range": {"modified_ts": {"lt": A_WHILE_AGO}}}, #OF A MINIMUM AGE ]}, fields=["bug_id", "product", "component", "status_whiteboard", "bug_group", "modified_ts"], limit=100 ) if leaked_whiteboard: for l in leaked_whiteboard: l.modified_ts=CNV.datetime2string(CNV.milli2datetime(l.modified_ts)) Log.error("Whiteboard leaking:\b{{leak}}", {"leak": leaked_whiteboard})
def stop(cls): if cls.profiler: from bzETL.util.cnv import CNV from bzETL.util.env.files import File p = pstats.Stats(cls.profiler) stats = [{ "num_calls":d[1], "self_time":d[2], "total_time":d[3], "file":(f[0] if f[0] != "~" else "").replace("\\", "/"), "line":f[1], "method":f[2].lstrip("<").rstrip(">") } for f, d, in p.stats.iteritems() ] CNV.list2tab(stats) File("profile.tab").write(CNV.list2tab(stats)) cls.main_log.stop()
def test_replication(): try: settings=startup.read_settings(filename="replication_settings.json") Log.start(settings.debug) source=ElasticSearch(settings.source) destination=replicate.get_or_create_index(settings["destination"], source) replicate.replicate(source, destination, [537285], CNV.string2datetime("19900101", "%Y%m%d")) finally: Log.stop()
def compare_both(candidate, reference, settings, some_bugs): File(settings.param.errors).delete() try_dir = settings.param.errors + "/try/" ref_dir = settings.param.errors + "/ref/" with Timer("Comparing to reference"): found_errors = False for bug_id in some_bugs: try: versions = Q.sort( get_all_bug_versions(candidate, bug_id, datetime.utcnow()), "modified_ts") # WE CAN NOT EXPECT candidate TO BE UP TO DATE BECAUSE IT IS USING AN OLD IMAGE if not versions: max_time = CNV.milli2datetime(settings.bugzilla.expires_on) else: max_time = CNV.milli2datetime(versions.last().modified_ts) pre_ref_versions = get_all_bug_versions( reference, bug_id, max_time) ref_versions = \ Q.sort( #ADDED TO FIX OLD PRODUCTION BUG VERSIONS [compare_es.old2new(x, settings.bugzilla.expires_on) for x in pre_ref_versions], "modified_ts" ) can = CNV.object2JSON(versions, pretty=True) ref = CNV.object2JSON(ref_versions, pretty=True) if can != ref: found_errors = True File(try_dir + unicode(bug_id) + ".txt").write(can) File(ref_dir + unicode(bug_id) + ".txt").write(ref) except Exception, e: found_errors = True Log.warning("Problem ETL'ing bug {{bug_id}}", {"bug_id": bug_id}, e) if found_errors: Log.error("DIFFERENCES FOUND (Differences shown in {{path}})", {"path": [try_dir, ref_dir]})
def extend(self, records): """ JUST SO WE MODEL A Queue """ records = {v["id"]: v["value"] for v in records} struct.unwrap(self.data).update(records) data_as_json = CNV.object2JSON(self.data, pretty=True) File(self.filename).write(data_as_json) Log.note("{{num}} items added", {"num": len(records)})
def compare_both(candidate, reference, settings, some_bugs): File(settings.param.errors).delete() try_dir = settings.param.errors + "/try/" ref_dir = settings.param.errors + "/ref/" with Timer("Comparing to reference"): found_errors = False for bug_id in some_bugs: try: versions = Q.sort( get_all_bug_versions(candidate, bug_id, datetime.utcnow()), "modified_ts") # WE CAN NOT EXPECT candidate TO BE UP TO DATE BECAUSE IT IS USING AN OLD IMAGE if not versions: max_time = CNV.milli2datetime(settings.bugzilla.expires_on) else: max_time = CNV.milli2datetime(versions.last().modified_ts) pre_ref_versions = get_all_bug_versions(reference, bug_id, max_time) ref_versions = \ Q.sort( #ADDED TO FIX OLD PRODUCTION BUG VERSIONS [compare_es.old2new(x, settings.bugzilla.expires_on) for x in pre_ref_versions], "modified_ts" ) can = CNV.object2JSON(versions, pretty=True) ref = CNV.object2JSON(ref_versions, pretty=True) if can != ref: found_errors = True File(try_dir + unicode(bug_id) + ".txt").write(can) File(ref_dir + unicode(bug_id) + ".txt").write(ref) except Exception, e: found_errors = True Log.warning("Problem ETL'ing bug {{bug_id}}", {"bug_id": bug_id}, e) if found_errors: Log.error("DIFFERENCES FOUND (Differences shown in {{path}})", { "path": [try_dir, ref_dir]} )
def old2new(bug): #THESE ARE KNOWN CHANGES THAT SHOULD BE MADE TO THE PRODUCTION VERSION bug.id = bug._id.replace(".", "_")[:-3] bug._id = None if bug.everconfirmed is not None: bug.everconfirmed = int(bug.everconfirmed) if bug.votes is not None: bug.votes = int(bug.votes) bug.dupe_by = CNV.value2intlist(bug.dupe_by) if bug.votes == 0: del bug["votes"] if Math.is_integer(bug.remaining_time) and int(bug.remaining_time) == 0: del bug["remaining_time"] if bug.cf_due_date is not None: bug.cf_due_date = CNV.datetime2milli( CNV.string2datetime(bug.cf_due_date, "%Y-%m-%d")) if bug.everconfirmed == 0: del bug["everconfirmed"] try: bug.cf_last_resolved = CNV.datetime2milli( CNV.string2datetime(bug.cf_last_resolved, "%Y-%m-%d %H:%M:%S")) except Exception, e: pass
def old2new(bug, max_date): """ CONVERT THE OLD ES FORMAT TO THE NEW THESE ARE KNOWN CHANGES THAT SHOULD BE MADE TO THE PRODUCTION VERSION """ if bug.everconfirmed != None: if bug.everconfirmed == "": bug.everconfirmed = None else: bug.everconfirmed = int(bug.everconfirmed) bug = CNV.JSON2object(CNV.object2JSON(bug).replace("bugzilla: other b.m.o issues ", "bugzilla: other b.m.o issues")) if bug.expires_on > max_date: bug.expires_on = parse_bug_history.MAX_TIME if bug.votes != None: bug.votes = int(bug.votes) bug.dupe_by = CNV.value2intlist(bug.dupe_by) if bug.votes == 0: del bug["votes"] # if Math.is_integer(bug.remaining_time) and int(bug.remaining_time) == 0: # bug.remaining_time = 0 if bug.cf_due_date != None and not Math.is_number(bug.cf_due_date): bug.cf_due_date = CNV.datetime2milli( CNV.string2datetime(bug.cf_due_date, "%Y-%m-%d") ) bug.changes = CNV.JSON2object( CNV.object2JSON(Q.sort(bug.changes, "field_name")) \ .replace("\"field_value_removed\":", "\"old_value\":") \ .replace("\"field_value\":", "\"new_value\":") ) if bug.everconfirmed == 0: del bug["everconfirmed"] if bug.id == "692436_1336314345": bug.votes = 3 try: if Math.is_number(bug.cf_last_resolved): bug.cf_last_resolved = long(bug.cf_last_resolved) else: bug.cf_last_resolved = CNV.datetime2milli(CNV.string2datetime(bug.cf_last_resolved, "%Y-%m-%d %H:%M:%S")) except Exception, e: pass
def test_replication(): try: settings = startup.read_settings(filename="replication_settings.json") Log.start(settings.debug) source = ElasticSearch(settings.source) destination = replicate.get_or_create_index(settings["destination"], source) replicate.replicate(source, destination, [537285], CNV.string2datetime("19900101", "%Y%m%d")) finally: Log.stop()
def analysis(settings, last_run, please_stop): DIFF = 7 if last_run: DIFF = 4 #ONCE WE HAVE ALL THE DATA IN WE CAN BE LESS DISCRIMINATING try_again = True while try_again and not please_stop: #FIND EMAIL MOST NEEDING REPLACEMENT problem_agg = Multiset(allow_negative=True) for bug_id, agg in bugs.iteritems(): #ONLY COUNT NEGATIVE EMAILS for email, count in agg.dic.iteritems(): if count < 0: problem_agg.add(alias(email), amount=count) problems = Q.sort([ {"email": e, "count": c} for e, c in problem_agg.dic.iteritems() if not aliases.get(e, Null).ignore and (c <= -(DIFF / 2) or last_run) ], ["count", "email"]) try_again = False for problem in problems: if please_stop: break #FIND MOST LIKELY MATCH solution_agg = Multiset(allow_negative=True) for bug_id, agg in bugs.iteritems(): if agg.dic.get(problem.email, 0) < 0: #ONLY BUGS THAT ARE EXPERIENCING THIS problem solution_agg += agg solutions = Q.sort([{"email": e, "count": c} for e, c in solution_agg.dic.iteritems()], [{"field": "count", "sort": -1}, "email"]) if last_run and len(solutions) == 2 and solutions[0].count == -solutions[1].count: #exact match pass elif len(solutions) <= 1 or (solutions[1].count + DIFF >= solutions[0].count): #not distinctive enough continue best_solution = solutions[0] Log.note("{{problem}} ({{score}}) -> {{solution}} {{matches}}", { "problem": problem.email, "score": problem.count, "solution": best_solution.email, "matches": CNV.object2JSON(Q.select(solutions, "count")[:10:]) }) try_again = True add_alias(problem.email, best_solution.email) saveAliases(settings)
def milli2datetime(r): """ CONVERT ANY longs INTO TIME STRINGS """ try: if r == None: return None elif isinstance(r, basestring): return r elif Math.is_number(r): if CNV.value2number(r) > 800000000000: return CNV.datetime2string(CNV.milli2datetime(r), "%Y-%m-%d %H:%M:%S") else: return r elif isinstance(r, dict): output = {} for k, v in r.items(): v = milli2datetime(v) if v != None: output[k.lower()] = v return output elif hasattr(r, '__iter__'): output = [] for v in r: v = milli2datetime(v) if v != None: output.append(v) if not output: return None try: return Q.sort(output) except Exception: return output else: return r except Exception, e: Log.warning("Can not scrub: {{json}}", {"json": r}, e)
def get_last_updated(es): try: results = es.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": { "range": { "modified_ts": {"gte": CNV.datetime2milli(far_back)}}} }}, "from": 0, "size": 0, "sort": [], "facets": {"0": {"statistical": {"field": "modified_ts"}}} }) if results.facets["0"].count == 0: return datetime.min return CNV.milli2datetime(results.facets["0"].max) except Exception, e: Log.error("Can not get_last_updated from {{host}}/{{index}}",{ "host": es.settings.host, "index": es.settings.index }, e)
def test_incremental_etl_catches_tracking_flags(self): database.make_test_instance(self.settings.bugzilla) with DB(self.settings.bugzilla) as db: es = elasticsearch.make_test_instance("candidate", self.settings.candidate) #SETUP RUN PARAMETERS param = Struct() param.end_time = CNV.datetime2milli(get_current_time(db)) # FLAGS ADDED TO BUG 813650 ON 18/12/2012 2:38:08 AM (PDT), SO START AT SOME LATER TIME param.start_time = CNV.datetime2milli( CNV.string2datetime("02/01/2013 10:09:15", "%d/%m/%Y %H:%M:%S")) param.start_time_str = extract_bugzilla.milli2string( db, param.start_time) param.alias_file = self.settings.param.alias_file param.bug_list = struct.wrap([813650]) param.allow_private_bugs = self.settings.param.allow_private_bugs with ThreadedQueue(es, size=1000) as output: etl(db, output, param, please_stop=None) Thread.sleep(2) # MUST SLEEP WHILE ES DOES ITS INDEXING versions = get_all_bug_versions(es, 813650) flags = [ "cf_status_firefox18", "cf_status_firefox19", "cf_status_firefox_esr17", "cf_status_b2g18" ] for v in versions: if v.modified_ts > param.start_time: for f in flags: if v[f] != "fixed": Log.error("813650 should have {{flag}}=='fixed'", {"flag": f})
def main(settings): #USE A FILE if settings.source.filename != None: settings.destination.alias = settings.destination.index settings.destination.index = ElasticSearch.proto_name(settings.destination.alias) schema = CNV.JSON2object(File(settings.source.schema_filename).read()) if transform_bugzilla.USE_ATTACHMENTS_DOT: schema = CNV.JSON2object(CNV.object2JSON(schema).replace("attachments_", "attachments.")) dest = ElasticSearch.create_index(settings.destination, schema, limit_replicas=True) dest.set_refresh_interval(-1) extract_from_file(settings.source, dest) dest.set_refresh_interval(1) dest.delete_all_but(settings.destination.alias, settings.destination.index) dest.add_alias(settings.destination.alias) return # SYNCH WITH source ES INDEX source=ElasticSearch(settings.source) destination=get_or_create_index(settings["destination"], source) # GET LAST UPDATED time_file = File(settings.param.last_replication_time) from_file = None if time_file.exists: from_file = CNV.milli2datetime(CNV.value2int(time_file.read())) from_es = get_last_updated(destination) last_updated = nvl(MIN(from_file, from_es), CNV.milli2datetime(0)) current_time = datetime.utcnow() pending = get_pending(source, last_updated) with ThreadedQueue(destination, size=1000) as data_sink: replicate(source, data_sink, pending, last_updated) # RECORD LAST UPDATED time_file.write(unicode(CNV.datetime2milli(current_time)))
def get_all_bug_versions(es, bug_id, max_time): data=es.search({ "query":{"filtered":{ "query":{"match_all":{}}, "filter":{"and":[ {"term":{"bug_id":bug_id}}, {"range":{"modified_ts":{"lte":CNV.datetime2milli(max_time)}}} ]} }}, "from":0, "size":200000, "sort":[] }) return Q.select(data.hits.hits, "_source")
def open_test_instance(name, settings): if settings.filename: Log.note("Using {{filename}} as {{type}}", { "filename": settings.filename, "type": name }) return Fake_ES(settings) else: Log.note("Using ES cluster at {{host}} as {{type}}", { "host": settings.host, "type": name }) ElasticSearch.delete_index(settings) schema = CNV.JSON2object(File(settings.schema_file).read(), flexible=True, paths=True) es = ElasticSearch.create_index(settings, schema, limit_replicas=True) return es
def extract_from_file(source_settings, destination): with File(source_settings.filename) as handle: for g, d in Q.groupby(handle, size=BATCH_SIZE): try: d2 = map( lambda (x): {"id": x.id, "value": x}, map( lambda(x): transform_bugzilla.normalize(CNV.JSON2object(fix_json(x))), d ) ) destination.add(d2) except Exception, e: filename = "Error_" + unicode(g) + ".txt" File(filename).write(d) Log.warning("Can not convert block {{block}} (file={{host}})", { "block": g, "filename": filename }, e)
def test_specific_bugs(self): """ USE A MYSQL DATABASE TO FILL AN ES INSTANCE (USE Fake_ES() INSTANCES TO KEEP THIS TEST LOCAL) WITH VERSIONS OF BUGS FROM settings.param.bugs. """ with DB(self.settings.bugzilla) as db: candidate = elasticsearch.make_test_instance("candidate", self.settings.elasticsearch) #SETUP RUN PARAMETERS param = Struct() param.end_time = CNV.datetime2milli(get_current_time(db)) param.start_time = 0 param.start_time_str = extract_bugzilla.milli2string(db, 0) param.alias_file = self.settings.param.alias_file param.bug_list = self.settings.param.bugs param.allow_private_bugs = self.settings.param.allow_private_bugs with ThreadedQueue(candidate, size=1000) as output: etl(db, output, param, please_stop=None)
def test_specific_bugs(self): """ USE A MYSQL DATABASE TO FILL AN ES INSTANCE (USE Fake_ES() INSTANCES TO KEEP THIS TEST LOCAL) WITH VERSIONS OF BUGS FROM settings.param.bugs. """ with DB(self.settings.bugzilla) as db: candidate = elasticsearch.make_test_instance( "candidate", self.settings.elasticsearch) #SETUP RUN PARAMETERS param = Struct() param.end_time = CNV.datetime2milli(get_current_time(db)) param.start_time = 0 param.start_time_str = extract_bugzilla.milli2string(db, 0) param.alias_file = self.settings.param.alias_file param.bug_list = self.settings.param.bugs param.allow_private_bugs = self.settings.param.allow_private_bugs with ThreadedQueue(candidate, size=1000) as output: etl(db, output, param, please_stop=None)
def diff(db, table, old_record, new_record): """ UPDATE bugs_activity WITH THE CHANGES IN RECORDS """ now = milli2string(db, CNV.datetime2milli(get_current_time(db))) changed = set(old_record.keys()) ^ set(new_record.keys()) changed |= set([k for k, v in old_record.items() if v != new_record[k]]) if table != u"bugs": prefix = table + u"." else: prefix = u"" for c in changed: fieldid = db.query( "SELECT id FROM fielddefs WHERE name={{field_name}}", {"field_name": prefix + c})[0].id if fieldid == None: Log.error("Expecting a valid field name") activity = Struct(bug_id=old_record.bug_id, who=1, bug_when=now, fieldid=fieldid, removed=old_record[c], added=new_record[c], attach_id=old_record.attach_id, comment_id=old_record.comment_id) db.insert("bugs_activity", activity) db.execute( "UPDATE bugs SET delta_ts={{now}} WHERE {{where}}", { "now": now, "where": esfilter2sqlwhere(db, {"term": { "bug_id": old_record.bug_id }}) })
def compare_both(candidate, reference, settings, some_bugs): File(settings.param.errors).delete() for bug_id in some_bugs: versions = Q.sort( get_all_bug_versions(candidate, bug_id, datetime.utcnow()), "modified_ts") # WE CAN NOT EXPECT candidate TO BE UP TO DATE BECAUSE IT IS USING AN OLD IMAGE if len(versions)==0: max_time=datetime.utcnow() else: max_time = CNV.milli2datetime(versions[-1].modified_ts) ref_versions = Q.sort(map(compare_es.old2new, get_all_bug_versions(reference, bug_id, max_time)), "modified_ts") can = json.dumps(json_scrub(versions), indent=4, sort_keys=True, separators=(',', ': ')) ref = json.dumps(json_scrub(ref_versions), indent=4, sort_keys=True, separators=(',', ': ')) found_errors=False if can != ref: found_errors=True File(settings.param.errors + "/try/" + str(bug_id) + ".txt").write(can) File(settings.param.errors + "/exp/" + str(bug_id) + ".txt").write(ref) return found_errors
def delete_record(self, filter): f = CNV.esfilter2where(filter) self.data = wrap({k: v for k, v in self.data.items() if not f(v)})