def startup_times(samples): all = [] for k in range(samples): start = time.time() do_or_die("$SAGE_ROOT/sage -c ''") all.append(time.time() - start) return all
def startup_modules(ticket, sage_binary, baseline=None, **kwds): # Sometimes the first run does something different... do_or_die("time $SAGE_ROOT/sage -c ''") # Print out all the modules imported at startup. modules = subprocess.check_output([sage_binary, "-c", r"print '\n'.join(sorted(sys.modules.keys()))"]).split('\n') print print "Total count:", len(modules) print if baseline is None: status = PluginResult.Passed data = {} else: module_set = set(modules) baseline_set = set(baseline) new = sorted(module_set - baseline_set) removed = sorted(baseline_set - module_set) if new: status = PluginResult.Failed print "New:" print " " + "\n ".join(new) else: status = PluginResult.Passed if removed: print "Removed:" print " " + "\n ".join(removed) data = {'new': new, 'removed': removed} if baseline: print print "=" * 20 print print '\n'.join(modules) return PluginResult(status, baseline=modules, data=data)
def docbuild(ticket, **kwds): do_or_die('make doc') doc_log = 'logs/dochtml.log' if os.path.exists(doc_log): r = subprocess.call(['grep', 'WARNING|SEVERE|ERROR|make.*Error|Exception occurred|Sphinx error|Segmentation fault', doc_log]) if r != 1: # grep returns 1 iff there were no matches raise ValueError
def startup_times(samples): do_or_die(sage_binary + " -c ''") all_times = [] for k in range(samples): start = time.time() do_or_die(sage_binary + " -c ''") all_times.append(time.time() - start) return all_times
def docbuild_pdf(ticket, make, **kwds): """ Build the PDF documentation. This requires a very complete LaTeX installation. It may report false failures if some LaTeX packages are missing. STILL EXPERIMENTAL! """ do_or_die('{} doc-pdf'.format(make))
def git_rev_list(ticket, **kwds): if str(ticket['id']) != '0': base_only = int(subprocess.check_output(["git", "rev-list", "--count", "patchbot/ticket_upstream..patchbot/base"])) ticket_only = int(subprocess.check_output(["git", "rev-list", "--count", "patchbot/base..patchbot/ticket_upstream"])) print("only in ticket ({})".format(ticket_only)) print("only in base ({})".format(base_only)) base = describe_branch('patchbot/ticket_upstream', tag_only=True) do_or_die("git diff --stat %s..patchbot/ticket_upstream" % base) do_or_die("git log --oneline %s..patchbot/ticket_upstream" % base) do_or_die("git log %s..patchbot/ticket_upstream" % base)
def git_rev_list(ticket, **kwds): if str(ticket['id']) != '0': base_only = int(subprocess.check_output(["git", "rev-list", "--count", "patchbot/ticket_upstream..patchbot/base"])) ticket_only = int(subprocess.check_output(["git", "rev-list", "--count", "patchbot/base..patchbot/ticket_upstream"])) print "only in ticket (%s)" % ticket_only print "only in base (%s)" % base_only print do_or_die("git diff --stat patchbot/base..patchbot/ticket_upstream") print do_or_die("git log --oneline patchbot/base..patchbot/ticket_upstream") print print do_or_die("git log patchbot/base..patchbot/ticket_upstream")
def startup_time(ticket, original_dir, patched_dir, loops=5, total_samples=30, dry_run=False, **kwds): if dry_run: loops //= 2 total_samples //= 5 print total_samples, "samples in", loops, "loops" ticket_id = ticket['id'] choose_base = "git checkout patchbot/base; make build > /dev/null" choose_ticket = "git checkout patchbot/ticket_merged; make build > /dev/null" try: def startup_times(samples): do_or_die("$SAGE_ROOT/sage -c ''") all = [] for k in range(samples): start = time.time() do_or_die("$SAGE_ROOT/sage -c ''") all.append(time.time() - start) return all main_timings = [] ticket_timings = [] os.chdir(patched_dir) do_or_die(choose_ticket) do_or_die("$SAGE_ROOT/sage -c ''") os.chdir(original_dir) do_or_die(choose_base) do_or_die("$SAGE_ROOT/sage -c ''") for k in range(loops): os.chdir(patched_dir) do_or_die(choose_ticket) ticket_timings.extend(startup_times(total_samples // loops + 2*k - loops + 1)) os.chdir(original_dir) do_or_die(choose_base) main_timings.extend(startup_times(total_samples // loops + 2*k - loops + 1)) print "main_timings =", main_timings print "ticket_timings =", ticket_timings n1 = len(main_timings) p1 = mean(main_timings) s1 = std_dev(main_timings) n2 = len(ticket_timings) p2 = mean(ticket_timings) s2 = std_dev(ticket_timings) base = p1 diff = abs(p2 - p1) increased = p1 < p2 inc_or_dec = ['decreased', 'increased'] print print "Main: %0.5g sec (%s samples, std_dev=%0.3g)" % (p1, n1, s1) print "Ticket: %0.5g sec (%s samples, std_dev=%0.3g)" % (p2, n2, s2) print print "Average %s of %0.2g secs or %0.2g%%." % ( inc_or_dec[increased][:-1], diff, 100 * diff / base) print print "Using the Mann-Whitney U test to determine significance." if increased: # swap n1, p1, s1, n2, p2, s2 = n2, p2, s2, n1, p1, s1 z = mann_whitney_U(main_timings, ticket_timings) confidence_intervals = [] for lower_bound in (1, .5, .25, .1, .05, .025, .01, 0.005, .0025, .001): z = mann_whitney_U(main_timings, ticket_timings, offset=base*lower_bound) confidence = CDF(z) if confidence > 0.25: confidence_intervals.append((confidence, lower_bound)) if len(confidence_intervals) >= 5: break status = PluginResult.Passed if not confidence_intervals: print "No statistically significant difference." for confidence, lower_bound, in confidence_intervals: if increased and confidence >= .9 and lower_bound >= .001: status = PluginResult.Failed # Print 99.999x%. confidence = 1 - float(("%0.1g" if confidence > .9 else "%0.2g") % (1 - confidence)) print "With %g%% confidence, startup time %s by at least %0.2g%%" % ( 100 * confidence, inc_or_dec[increased], 100 * lower_bound) if not increased: confidence_intervals = [(x, -y) for x, y in confidence_intervals] data = dict(confidence_intervals=confidence_intervals, main_timings=main_timings, ticket_timings=ticket_timings, loops=loops, total_samples=total_samples) if str(ticket_id) == '0': status = PluginResult.Passed return PluginResult(status, data=data) finally: print os.chdir(patched_dir) do_or_die(choose_ticket)
def startup_time(ticket, loops=5, total_samples=30, **kwds): ticket_id = ticket["id"] try: def startup_times(samples): all = [] for k in range(samples): start = time.time() do_or_die("$SAGE_ROOT/sage -c ''") all.append(time.time() - start) return all main_timings = [] ticket_timings = [] do_or_die("$SAGE_ROOT/sage -b %s > /dev/null; sage -c ''" % ticket_id) do_or_die("$SAGE_ROOT/sage -b 0 > /dev/null; sage -c ''") for k in range(loops): do_or_die("$SAGE_ROOT/sage -b %s > /dev/null" % ticket_id) ticket_timings.extend(startup_times(total_samples // loops + 2 * k - loops + 1)) do_or_die("$SAGE_ROOT/sage -b 0 > /dev/null") main_timings.extend(startup_times(total_samples // loops + 2 * k - loops + 1)) print "main_timings =", main_timings print "ticket_timings =", ticket_timings n1 = len(main_timings) p1 = mean(main_timings) s1 = std_dev(main_timings) n2 = len(ticket_timings) p2 = mean(ticket_timings) s2 = std_dev(ticket_timings) base = p1 diff = abs(p2 - p1) increased = p1 < p2 inc_or_dec = ["decreased", "increased"] print print "Main: %0.5g sec (%s samples, std_dev=%0.3g)" % (p1, n1, s1) print "Ticket: %0.5g sec (%s samples, std_dev=%0.3g)" % (p2, n2, s2) print print "Average %s of %0.2g secs or %0.2g%%." % (inc_or_dec[increased][:-1], diff, 100 * diff / base) print print "Using the Mann-Whitney U test to determine significance." if increased: # swap n1, p1, s1, n2, p2, s2 = n2, p2, s2, n1, p1, s1 z = mann_whitney_U(main_timings, ticket_timings) confidence_intervals = [] for lower_bound in (1, 0.5, 0.25, 0.1, 0.05, 0.025, 0.01, 0.005, 0.0025, 0.001): z = mann_whitney_U(main_timings, ticket_timings, offset=base * lower_bound) confidence = CDF(z) if confidence > 0.25: confidence_intervals.append((confidence, lower_bound)) if len(confidence_intervals) >= 5: break status = PluginResult.Passed if not confidence_intervals: print "No statistically significant difference." for confidence, lower_bound in confidence_intervals: if increased and confidence >= 0.9 and lower_bound >= 0.001: status = PluginResult.Failed # Print 99.999x%. confidence = 1 - float(("%0.1g" if confidence > 0.9 else "%0.2g") % (1 - confidence)) print "With %g%% confidence, startup time %s by at least %0.2g%%" % ( 100 * confidence, inc_or_dec[increased], 100 * lower_bound, ) if not increased: confidence_intervals = [(x, -y) for x, y in confidence_intervals] data = dict( confidence_intervals=confidence_intervals, main_timings=main_timings, ticket_timings=ticket_timings, loops=loops, total_samples=total_samples, ) return PluginResult(status, data=data) finally: print do_or_die("$SAGE_ROOT/sage -b %s > /dev/null" % ticket_id)
def docbuild(ticket, **kwds): do_or_die("$SAGE_ROOT/sage -docbuild --jsmath reference html")
def coverage(ticket, **kwds): do_or_die('$SAGE_ROOT/sage -coverageall')
def startup_time(ticket, make, sage_binary, loops=5, total_samples=50, dry_run=False, **kwds): """ Try to decide if the startup time is getting worse. """ if dry_run: loops //= 2 total_samples //= 5 print("{} samples in {} loops".format(total_samples, loops)) ticket_id = ticket['id'] choose_base = "git checkout patchbot/base; {} build > /dev/null".format(make) choose_ticket = "git checkout patchbot/ticket_merged; {} build > /dev/null".format(make) def startup_times(samples): do_or_die(sage_binary + " -c ''") all_times = [] for k in range(samples): start = time.time() do_or_die(sage_binary + " -c ''") all_times.append(time.time() - start) return all_times try: main_timings = [] do_or_die(choose_base) for k in range(loops): main_timings.extend(startup_times(total_samples // loops + 2 * k - loops + 1)) ticket_timings = [] do_or_die(choose_ticket) for k in range(loops): ticket_timings.extend(startup_times(total_samples // loops + 2 * k - loops + 1)) print("main_timings = {}".format(main_timings)) print("ticket_timings = {}".format(ticket_timings)) n1 = len(main_timings) p1 = mean(main_timings) s1 = std_dev(main_timings) n2 = len(ticket_timings) p2 = mean(ticket_timings) s2 = std_dev(ticket_timings) base = p1 diff = abs(p2 - p1) increased = p1 < p2 inc_or_dec = ['decreased', 'increased'] print("Main: %0.5g sec (%s samples, std_dev=%0.3g)" % (p1, n1, s1)) print("Ticket: %0.5g sec (%s samples, std_dev=%0.3g)" % (p2, n2, s2)) print("Average %s of %0.2g secs or %0.2g%%." % (inc_or_dec[increased][:-1], diff, 100 * diff / base)) print("Using the Mann-Whitney U test to determine significance.") if increased: # swap n1, p1, s1, n2, p2, s2 = n2, p2, s2, n1, p1, s1 z = mann_whitney_U(main_timings, ticket_timings) confidence_intervals = [] for lower_bound in (1, .5, .25, .1, .05, .025, .01, 0.005, .0025, .001): z = mann_whitney_U(main_timings, ticket_timings, offset=base * lower_bound) confidence = CDF(z) if confidence > 0.25: confidence_intervals.append((confidence, lower_bound)) if len(confidence_intervals) >= 5: break status = PluginResult.Passed if not confidence_intervals: print("No statistically significant difference.") else: print("May have caused a slowdown.") for confidence, lower_bound, in confidence_intervals: if increased and confidence >= .95 and lower_bound >= .001: status = PluginResult.Failed confidence = 1 - float(("%0.1g" if confidence > .9 else "%0.2g") % (1 - confidence)) print("With %g%% confidence, startup time %s by at least %0.2g%%" % ( 100 * confidence, inc_or_dec[increased], 100 * lower_bound)) if not increased: confidence_intervals = [(x, -y) for x, y in confidence_intervals] data = dict(confidence_intervals=confidence_intervals, main_timings=main_timings, ticket_timings=ticket_timings, loops=loops, total_samples=total_samples) if str(ticket_id) == '0': # Never fail the initial startup. status = PluginResult.Passed return PluginResult(status, data=data) finally: do_or_die(choose_ticket)
def docbuild(ticket, make, **kwds): """ Build the documentation. """ do_or_die('{} doc'.format(make))