def __init__(self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, preparation_cmd, run_option='end_of_day', start_date=None, overwrite=False): self.benchmarks = benchmarks self.checksums = [b.checksum for b in benchmarks] self.start_date = start_date self.run_option = run_option self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd, preparation_cmd) self._register_benchmarks()
def __init__(self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, prep_cmd, clean_cmd=None, run_option='eod', run_order='normal', start_date=None, overwrite=False, module_dependencies=None, always_clean=False, use_blacklist=True, verify=False): log.info("Initializing benchmark runner for %d benchmarks" % (len(benchmarks))) self._benchmarks = None self._checksums = None if verify: verify_benchmarks(benchmarks, raise_=True) self.start_date = start_date self.run_option = run_option self.run_order = run_order self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) self.use_blacklist = use_blacklist # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd, prep_cmd, clean_cmd, always_clean=always_clean, dependencies=module_dependencies) self.benchmarks = benchmarks
def __init__(self, benchmarks, repo_path, repo_url, build_cmd, db_path, tmp_dir, preparation_cmd, run_option='eod', start_date=None, overwrite=False, module_dependencies=None, always_clean=False, use_blacklist=True, time=True): self.benchmarks = benchmarks self.checksums = [b.checksum for b in benchmarks] self.start_date = start_date self.run_option = run_option self.repo_path = repo_path self.db_path = db_path self.repo = GitRepo(self.repo_path) self.db = BenchmarkDB(db_path) self.use_blacklist = use_blacklist self.blacklist = set(self.db.get_rev_blacklist()) self.time = time # where to copy the repo self.tmp_dir = tmp_dir self.bench_repo = BenchRepo(repo_url, self.tmp_dir, build_cmd, preparation_cmd, always_clean=always_clean, dependencies=module_dependencies) self._register_benchmarks() self._python = os.environ.get('VBENCH_PYTHON', 'python')
import subprocess import os import re import sys import numpy as np from pandas import * if __name__ == '__main__': from vbench.git import GitRepo repo = GitRepo('/Users/wesm/code/pandas') churn = repo.get_churn_by_file() file_include = [] for path in churn.major_axis: if path.endswith('.pyx') or path.endswith('.py'): file_include.append(path) commits_include = [sha for sha in churn.minor_axis if 'LF' not in repo.messages[sha]] commits_include.remove('dcf3490') clean_churn = churn.reindex(major=file_include, minor=commits_include) by_commit = clean_churn.sum('major').sum(1) by_date = by_commit.groupby(repo.commit_date).sum() by_date = by_date.drop([datetime(2011, 6, 10)])
from pandas import * import matplotlib.pyplot as plt import sqlite3 from vbench.git import GitRepo REPO_PATH = '/home/adam/code/pandas' repo = GitRepo(REPO_PATH) con = sqlite3.connect('vb_suite/benchmarks.db') bmk = '36900a889961162138c140ce4ae3c205' # bmk = '9d7b8c04b532df6c2d55ef497039b0ce' bmk = '4481aa4efa9926683002a673d2ed3dac' bmk = '00593cd8c03d769669d7b46585161726' bmk = '3725ab7cd0a0657d7ae70f171c877cea' bmk = '3cd376d6d6ef802cdea49ac47a67be21' bmk2 = '459225186023853494bc345fd180f395' bmk = 'c22ca82e0cfba8dc42595103113c7da3' bmk = 'e0e651a8e9fbf0270ab68137f8b9df5f' bmk = '96bda4b9a60e17acf92a243580f2a0c3' def get_results(bmk): results = con.execute("select * from results where checksum='%s'" % bmk).fetchall() x = Series(dict((t[1], t[3]) for t in results)) x.index = x.index.map(repo.timestamps.get) x = x.sort_index() return x
{ 'insertions': DataFrame(insertions), 'deletions': DataFrame(deletions) }, minor_axis=shas) # return DataFrame({'insertions' : insertions, # 'deletions' : deletions}, index=shas) if __name__ == '__main__': # commits, hists = get_commit_history() # churn = get_code_churn(commits) from vbench.git import GitRepo repo = GitRepo('/Users/wesm/code/pandas') churn = repo.get_churn_by_file() file_include = [] for path in churn.major_axis: if path.endswith('.pyx') or path.endswith('.py'): file_include.append(path) commits_include = [ sha for sha in churn.minor_axis if 'LF' not in repo.messages[sha] ] commits_include.remove('dcf3490') clean_churn = churn.reindex(major=file_include, minor=commits_include) by_commit = clean_churn.sum('major').sum(1)
def main(): TMP_DIR = tempfile.mkdtemp() prprint("TMP_DIR = %s" % TMP_DIR) prprint("LOG_FILE = %s\n" % LOG_FILE) try: logfile = open(LOG_FILE, 'w') prprint("Processing Repo at '%s'..." % REPO_PATH) repo = GitRepo(REPO_PATH) # get hashes of baseline and current head h_head = repo.shas[-1] h_baseline = BASELINE_COMMIT prprint("Opening DB at '%s'...\n" % DB_PATH) db = BenchmarkDB(DB_PATH) prprint('Comparing Head [%s] : %s ' % (h_head, repo.messages.get(h_head, ""))) prprint('Against baseline [%s] : %s \n' % (h_baseline, repo.messages.get(h_baseline, ""))) prprint("Initializing Runner...") runner = BenchmarkRunner( benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, TMP_DIR, PREPARE, always_clean=True, # run_option='eod', start_date=START_DATE, module_dependencies=dependencies) prprint("removing any previous measurements for the commits.") db.delete_rev_results(h_baseline) db.delete_rev_results(h_head) # TODO: we could skip this, but we need to make sure all # results are in the DB, which is a little tricky with # start dates and so on. prprint("Running benchmarks for baseline commit '%s'" % h_baseline) runner._run_and_write_results(h_baseline) prprint("Running benchmarks for current HEAD '%s'" % h_head) runner._run_and_write_results(h_head) prprint('Processing results...') head_res = get_results_df(db, h_head) baseline_res = get_results_df(db, h_baseline) ratio = head_res['timing'] / baseline_res['timing'] totals = DataFrame(dict(t_head=head_res['timing'], t_baseline=baseline_res['timing'], ratio=ratio, name=baseline_res.name), columns=["t_head", "t_baseline", "ratio", "name"]) totals = totals.ix[totals.t_head > 0.010] # ignore sub 10micros totals = totals.dropna().sort("ratio").set_index( 'name') # sort in ascending order s = "\n\nResults:\n" + totals.to_string( float_format=lambda x: "%0.4f" % x) + "\n\n" s += "Columns: test_name | head_time [ms] | baseline_time [ms] | ratio\n\n" s += "- a Ratio of 1.30 means HEAD is 30% slower then the Baseline.\n\n" s += 'Head [%s] : %s\n' % (h_head, repo.messages.get(h_head, "")) s += 'Baseline [%s] : %s\n\n' % (h_baseline, repo.messages.get(h_baseline, "")) logfile.write(s) logfile.close() prprint(s) prprint("Results were also written to the logfile at '%s'\n" % LOG_FILE) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR) logfile.close()