def diff(bms, loops, track, old, new): benchmarks = collections.defaultdict(Benchmark) badjson_files = {} nonexistant_files = {} for bm in bms: for loop in range(0, loops): for line in subprocess.check_output( ['bm_diff_%s/opt/%s' % (old, bm), '--benchmark_list_tests']).splitlines(): stripped_line = line.strip().replace("/", "_").replace( "<", "_").replace(">", "_").replace(", ", "_") js_new_ctr = _read_json('%s.%s.counters.%s.%d.json' % (bm, stripped_line, new, loop), badjson_files, nonexistant_files) js_new_opt = _read_json('%s.%s.opt.%s.%d.json' % (bm, stripped_line, new, loop), badjson_files, nonexistant_files) js_old_ctr = _read_json('%s.%s.counters.%s.%d.json' % (bm, stripped_line, old, loop), badjson_files, nonexistant_files) js_old_opt = _read_json('%s.%s.opt.%s.%d.json' % (bm, stripped_line, old, loop), badjson_files, nonexistant_files) if js_new_ctr: for row in bm_json.expand_json(js_new_ctr, js_new_opt): name = row['cpp_name'] if name.endswith('_mean') or name.endswith('_stddev'): continue benchmarks[name].add_sample(track, row, True) if js_old_ctr: for row in bm_json.expand_json(js_old_ctr, js_old_opt): name = row['cpp_name'] if name.endswith('_mean') or name.endswith('_stddev'): continue benchmarks[name].add_sample(track, row, False) really_interesting = set() for name, bm in benchmarks.items(): _maybe_print(name) really_interesting.update(bm.process(track, new, old)) fields = [f for f in track if f in really_interesting] headers = ['Benchmark'] + fields rows = [] for name in sorted(benchmarks.keys()): if benchmarks[name].skip(): continue rows.append([name] + benchmarks[name].row(fields)) note = 'Corrupt JSON data (indicates timeout or crash) = %s' % str( badjson_files) note += '\n\nMissing files (new benchmark) = %s' % str(nonexistant_files) if rows: return tabulate.tabulate(rows, headers=headers, floatfmt='+.2f'), note else: return None, note
def finalize(): benchmarks = collections.defaultdict(Benchmark) for bm in args.benchmarks: for loop in range(0, args.loops): js_new_ctr = read_json('%s.counters.new.%d.json' % (bm, loop)) js_new_opt = read_json('%s.opt.new.%d.json' % (bm, loop)) js_old_ctr = read_json('%s.counters.old.%d.json' % (bm, loop)) js_old_opt = read_json('%s.opt.old.%d.json' % (bm, loop)) if js_new_ctr: for row in bm_json.expand_json(js_new_ctr, js_new_opt): print row name = row['cpp_name'] if name.endswith('_mean') or name.endswith('_stddev'): continue benchmarks[name].add_sample(row, True) if js_old_ctr: for row in bm_json.expand_json(js_old_ctr, js_old_opt): print row name = row['cpp_name'] if name.endswith('_mean') or name.endswith('_stddev'): continue benchmarks[name].add_sample(row, False) really_interesting = set() for name, bm in benchmarks.items(): print name really_interesting.update(bm.process()) fields = [f for f in args.track if f in really_interesting] headers = ['Benchmark'] + fields rows = [] for name in sorted(benchmarks.keys()): if benchmarks[name].skip(): continue rows.append([name] + benchmarks[name].row(fields)) if rows: text = 'Performance differences noted:\n' + tabulate.tabulate( rows, headers=headers, floatfmt='+.2f') else: text = 'No significant performance differences' print text comment_on_pr.comment_on_pr('```\n%s\n```' % text)
def finalize(): benchmarks = collections.defaultdict(Benchmark) for bm in args.benchmarks: for loop in range(0, args.loops): js_new_ctr = read_json('%s.counters.new.%d.json' % (bm, loop)) js_new_opt = read_json('%s.opt.new.%d.json' % (bm, loop)) js_old_ctr = read_json('%s.counters.old.%d.json' % (bm, loop)) js_old_opt = read_json('%s.opt.old.%d.json' % (bm, loop)) if js_new_ctr: for row in bm_json.expand_json(js_new_ctr, js_new_opt): print row name = row['cpp_name'] if name.endswith('_mean') or name.endswith('_stddev'): continue benchmarks[name].add_sample(row, True) if js_old_ctr: for row in bm_json.expand_json(js_old_ctr, js_old_opt): print row name = row['cpp_name'] if name.endswith('_mean') or name.endswith('_stddev'): continue benchmarks[name].add_sample(row, False) really_interesting = set() for name, bm in benchmarks.items(): print name really_interesting.update(bm.process()) fields = [f for f in args.track if f in really_interesting] headers = ['Benchmark'] + fields rows = [] for name in sorted(benchmarks.keys()): if benchmarks[name].skip(): continue rows.append([name] + benchmarks[name].row(fields)) if rows: text = 'Performance differences noted:\n' + tabulate.tabulate(rows, headers=headers, floatfmt='+.2f') else: text = 'No significant performance differences' print text comment_on_pr.comment_on_pr('```\n%s\n```' % text)
'float': float, 'boolean': bool, 'string': str, 'timestamp': str, } if sys.argv[1] == '--schema': print(',\n'.join('%s:%s' % (k, t.upper()) for k, t in columns)) sys.exit(0) with open(sys.argv[1]) as f: js = json.loads(f.read()) if len(sys.argv) > 2: with open(sys.argv[2]) as f: js2 = json.loads(f.read()) else: js2 = None # TODO(jtattermusch): write directly to a file instead of stdout writer = csv.DictWriter(sys.stdout, [c for c, t in columns]) for row in bm_json.expand_json(js, js2): sane_row = {} for name, sql_type in columns: if name in row: if row[name] == '': continue sane_row[name] = SANITIZE[sql_type](row[name]) writer.writerow(sane_row)
'integer': int, 'float': float, 'boolean': bool, 'string': str, 'timestamp': str, } if sys.argv[1] == '--schema': print ',\n'.join('%s:%s' % (k, t.upper()) for k, t in columns) sys.exit(0) with open(sys.argv[1]) as f: js = json.loads(f.read()) if len(sys.argv) > 2: with open(sys.argv[2]) as f: js2 = json.loads(f.read()) else: js2 = None # TODO(jtattermusch): write directly to a file instead of stdout writer = csv.DictWriter(sys.stdout, [c for c, t in columns]) for row in bm_json.expand_json(js, js2): sane_row = {} for name, sql_type in columns: if name in row: if row[name] == '': continue sane_row[name] = SANITIZE[sql_type](row[name]) writer.writerow(sane_row)
help='files to diff. ') args = argp.parse_args() with open(args.files[0]) as f: js_new_ctr = json.loads(f.read()) with open(args.files[1]) as f: js_new_opt = json.loads(f.read()) with open(args.files[2]) as f: js_old_ctr = json.loads(f.read()) with open(args.files[3]) as f: js_old_opt = json.loads(f.read()) new = {} old = {} for row in bm_json.expand_json(js_new_ctr, js_new_opt): new[row['cpp_name']] = row for row in bm_json.expand_json(js_old_ctr, js_old_opt): old[row['cpp_name']] = row changed = [] for fld in args.track: chk = _INTERESTING[fld] for bm in new.keys(): if bm not in old: continue n = new[bm] o = old[bm] if fld not in n or fld not in o: continue if chk(n[fld], o[fld]): changed.append((fld, chk)) break
def diff(bms, loops, regex, track, old, new, counters): benchmarks = collections.defaultdict(Benchmark) badjson_files = {} nonexistant_files = {} for bm in bms: for loop in range(0, loops): for line in subprocess.check_output([ 'bm_diff_%s/opt/%s' % (old, bm), '--benchmark_list_tests', '--benchmark_filter=%s' % regex ]).splitlines(): line = line.decode('UTF-8') stripped_line = line.strip().replace("/", "_").replace( "<", "_").replace(">", "_").replace(", ", "_") js_new_opt = _read_json( '%s.%s.opt.%s.%d.json' % (bm, stripped_line, new, loop), badjson_files, nonexistant_files) js_old_opt = _read_json( '%s.%s.opt.%s.%d.json' % (bm, stripped_line, old, loop), badjson_files, nonexistant_files) if counters: js_new_ctr = _read_json( '%s.%s.counters.%s.%d.json' % (bm, stripped_line, new, loop), badjson_files, nonexistant_files) js_old_ctr = _read_json( '%s.%s.counters.%s.%d.json' % (bm, stripped_line, old, loop), badjson_files, nonexistant_files) else: js_new_ctr = None js_old_ctr = None for row in bm_json.expand_json(js_new_ctr, js_new_opt): name = row['cpp_name'] if name.endswith('_mean') or name.endswith('_stddev'): continue benchmarks[name].add_sample(track, row, True) for row in bm_json.expand_json(js_old_ctr, js_old_opt): name = row['cpp_name'] if name.endswith('_mean') or name.endswith('_stddev'): continue benchmarks[name].add_sample(track, row, False) really_interesting = set() for name, bm in benchmarks.items(): _maybe_print(name) really_interesting.update(bm.process(track, new, old)) fields = [f for f in track if f in really_interesting] # figure out the significance of the changes... right now we take the 95%-ile # benchmark delta %-age, and then apply some hand chosen thresholds histogram = [] for bm in benchmarks.values(): if bm.skip(): continue d = bm.speedup['cpu_time'] if d is None: continue histogram.append(d) histogram.sort() print("histogram of speedups: ", histogram) if len(histogram) == 0: significance = 0 else: delta = histogram[int(len(histogram) * 0.95)] mul = 1 if delta < 0: delta = -delta mul = -1 if delta < 2: significance = 0 elif delta < 5: significance = 1 elif delta < 10: significance = 2 else: significance = 3 significance *= mul headers = ['Benchmark'] + fields rows = [] for name in sorted(benchmarks.keys()): if benchmarks[name].skip(): continue rows.append([name] + benchmarks[name].row(fields)) note = None if len(badjson_files): note = 'Corrupt JSON data (indicates timeout or crash): \n%s' % fmt_dict( badjson_files) if len(nonexistant_files): if note: note += '\n\nMissing files (indicates new benchmark): \n%s' % fmt_dict( nonexistant_files) else: note = '\n\nMissing files (indicates new benchmark): \n%s' % fmt_dict( nonexistant_files) if rows: return tabulate.tabulate(rows, headers=headers, floatfmt='+.2f'), note, significance else: return None, note, 0
def diff(bms, loops, regex, track, old, new, counters): benchmarks = collections.defaultdict(Benchmark) badjson_files = {} nonexistant_files = {} for bm in bms: for loop in range(0, loops): for line in subprocess.check_output([ 'bm_diff_%s/opt/%s' % (old, bm), '--benchmark_list_tests', '--benchmark_filter=%s' % regex ]).splitlines(): stripped_line = line.strip().replace("/", "_").replace( "<", "_").replace(">", "_").replace(", ", "_") js_new_opt = _read_json('%s.%s.opt.%s.%d.json' % (bm, stripped_line, new, loop), badjson_files, nonexistant_files) js_old_opt = _read_json('%s.%s.opt.%s.%d.json' % (bm, stripped_line, old, loop), badjson_files, nonexistant_files) if counters: js_new_ctr = _read_json('%s.%s.counters.%s.%d.json' % (bm, stripped_line, new, loop), badjson_files, nonexistant_files) js_old_ctr = _read_json('%s.%s.counters.%s.%d.json' % (bm, stripped_line, old, loop), badjson_files, nonexistant_files) else: js_new_ctr = None js_old_ctr = None for row in bm_json.expand_json(js_new_ctr, js_new_opt): name = row['cpp_name'] if name.endswith('_mean') or name.endswith('_stddev'): continue benchmarks[name].add_sample(track, row, True) for row in bm_json.expand_json(js_old_ctr, js_old_opt): name = row['cpp_name'] if name.endswith('_mean') or name.endswith('_stddev'): continue benchmarks[name].add_sample(track, row, False) really_interesting = set() for name, bm in benchmarks.items(): _maybe_print(name) really_interesting.update(bm.process(track, new, old)) fields = [f for f in track if f in really_interesting] headers = ['Benchmark'] + fields rows = [] for name in sorted(benchmarks.keys()): if benchmarks[name].skip(): continue rows.append([name] + benchmarks[name].row(fields)) note = None if len(badjson_files): note = 'Corrupt JSON data (indicates timeout or crash): \n%s' % fmt_dict( badjson_files) if len(nonexistant_files): if note: note += '\n\nMissing files (indicates new benchmark): \n%s' % fmt_dict( nonexistant_files) else: note = '\n\nMissing files (indicates new benchmark): \n%s' % fmt_dict( nonexistant_files) if rows: return tabulate.tabulate(rows, headers=headers, floatfmt='+.2f'), note else: return None, note