def test_remove_samples(): benchmark1 = {'name': 'a', 'version': '1', 'params': []} benchmark2 = {'name': 'b', 'version': '1', 'params': [['1', '2', '3']]} r = results.Results.unnamed() v1 = runner.BenchmarkResult(result=[True], samples=[[1]], number=[1], profile=None, errcode=0, stderr='') v2 = runner.BenchmarkResult(result=[True]*3, samples=[[1],[2],[3]], number=[1,1,1], profile=None, errcode=0, stderr='') r.add_result(benchmark1, v1, record_samples=True) r.add_result(benchmark2, v2, record_samples=True) assert r.get_result_samples(benchmark1['name'], benchmark1['params']) == v1.samples assert r.get_result_samples(benchmark2['name'], benchmark2['params']) == v2.samples r.remove_samples(benchmark1['name']) assert r.get_result_samples(benchmark1['name'], benchmark1['params']) == [None] r.remove_samples(benchmark2['name'], selected_idx=[1]) assert r.get_result_samples(benchmark2['name'], benchmark2['params']) == [[1], None, [3]] r.remove_samples(benchmark2['name']) assert r.get_result_samples(benchmark2['name'], benchmark2['params']) == [None, None, None]
def test_json_timestamp(tmpdir): # Check that per-benchmark timestamps are saved as JS timestamps in the result file tmpdir = six.text_type(tmpdir) stamp0 = datetime.datetime(1970, 1, 1) stamp1 = datetime.datetime(1971, 1, 1) duration = 1.5 r = results.Results({'machine': 'mach'}, {}, 'aaaa', util.datetime_to_timestamp(stamp0), 'py', 'env', {}) value = runner.BenchmarkResult(result=[42], samples=[None], number=[None], profile=None, errcode=0, stderr='') benchmark = { 'name': 'some_benchmark', 'version': 'some version', 'params': [] } r.add_result(benchmark, value, started_at=stamp1, duration=duration) r.save(tmpdir) r = util.load_json(join(tmpdir, 'mach', 'aaaa-env.json')) keys = r['result_columns'] values = dict(zip(keys, r['results']['some_benchmark'])) assert values['started_at'] == util.datetime_to_js_timestamp(stamp1) assert values['duration'] == duration
def test_json_timestamp(tmpdir): # Check that per-benchmark timestamps are saved as JS timestamps in the result file tmpdir = six.text_type(tmpdir) stamp0 = datetime.datetime(1970, 1, 1) stamp1 = datetime.datetime(1971, 1, 1) stamp2 = datetime.datetime.utcnow() r = results.Results({'machine': 'mach'}, {}, 'aaaa', util.datetime_to_timestamp(stamp0), 'py', 'env') value = runner.BenchmarkResult(result=[42], params=[], stats=None, samples=None, started_at=stamp1, ended_at=stamp2, profile=None, errcode=0, stderr='') r.add_result('some_benchmark', value, "some version") r.save(tmpdir) r = util.load_json(join(tmpdir, 'mach', 'aaaa-env.json')) assert r['started_at']['some_benchmark'] == util.datetime_to_js_timestamp( stamp1) assert r['ended_at']['some_benchmark'] == util.datetime_to_js_timestamp( stamp2)
def test_skip_param_selection(): d = {'repo': 'foo'} d.update(ASV_CONF_JSON) conf = config.Config.from_json(d) class DummyEnv(object): name = 'env' d = [{ 'name': 'test_nonparam', 'params': [], 'version': '1' }, { 'name': 'test_param', 'params': [['1', '2', '3']], 'param_names': ['n'], 'version': '1' }] results = Results.unnamed() b = benchmarks.Benchmarks(conf, d, [r'test_nonparam', r'test_param\([23]\)']) results.add_result( b['test_param'], runner.BenchmarkResult(result=[1, 2, 3], samples=[None] * 3, number=[None] * 3, errcode=0, stderr='', profile=None)) runner.skip_benchmarks(b, DummyEnv(), results) assert results._results.get('test_nonparam') == None assert results._results['test_param'] == [1, None, None]
def generate_result_dir(tmpdir, dvcs, values, branches=None): result_dir = join(tmpdir, "results") os.makedirs(result_dir) html_dir = join(tmpdir, "html") machine_dir = join(result_dir, "tarzan") os.makedirs(machine_dir) if branches is None: branches = [None] conf = config.Config.from_json({ 'results_dir': result_dir, 'html_dir': html_dir, 'repo': dvcs.path, 'project': 'asv', 'branches': branches or [None], }) repo = get_repo(conf) util.write_json(join(machine_dir, "machine.json"), { 'machine': 'tarzan', 'version': 1, }) timestamp = datetime.datetime.utcnow() benchmark_version = sha256(os.urandom(16)).hexdigest() params = [] param_names = None for commit, value in values.items(): if isinstance(value, dict): params = value["params"] value = value["result"] else: value = [value] result = Results({"machine": "tarzan"}, {}, commit, repo.get_date_from_name(commit), "2.7", None, {}) value = runner.BenchmarkResult(result=value, samples=[None] * len(value), number=[None] * len(value), errcode=0, stderr='', profile=None) result.add_result( { "name": "time_func", "version": benchmark_version, "params": params }, value, started_at=timestamp, duration=1.0) result.save(result_dir) if params: param_names = ["param{}".format(k) for k in range(len(params))] util.write_json(join(result_dir, "benchmarks.json"), { "time_func": { "name": "time_func", "params": params or [], "param_names": param_names or [], "version": benchmark_version, } }, api_version=2) return conf
def test_results(tmpdir): tmpdir = six.text_type(tmpdir) timestamp1 = datetime.datetime.utcnow() duration = 1.5 resultsdir = join(tmpdir, "results") for i in six.moves.xrange(10): r = results.Results({ 'machine': 'foo', 'arch': 'x86_64' }, {}, hex(i), i * 1000000, '2.7', 'some-environment-name', {}) x1 = float(i * 0.001) x2 = float(i * 0.001) x3 = float((i + 1)**-1) values = { 'suite1.benchmark1': { 'result': [x1], 'number': [1], 'samples': [[x1, x1]], 'params': [['a']], 'version': "1", 'profile': b'\x00\xff' }, 'suite1.benchmark2': { 'result': [x2], 'number': [1], 'samples': [[x2, x2, x2]], 'params': [], 'version': "1", 'profile': b'\x00\xff' }, 'suite2.benchmark1': { 'result': [x3], 'number': [None], 'samples': [None], 'params': [['c']], 'version': None, 'profile': b'\x00\xff' } } for key, val in values.items(): v = runner.BenchmarkResult(result=val['result'], samples=val['samples'], number=val['number'], profile=val['profile'], errcode=0, stderr='') benchmark = { 'name': key, 'version': val['version'], 'params': val['params'] } r.add_result(benchmark, v, record_samples=True, started_at=timestamp1, duration=duration) # Save / add_existing_results roundtrip r.save(resultsdir) r2 = results.Results.load(join(resultsdir, r._filename)) assert r2.date == r.date assert r2.commit_hash == r.commit_hash assert r2._filename == r._filename r3 = results.Results(r.params, r._requirements, r.commit_hash, r.date, r._python, r.env_name, {}) r3.load_data(resultsdir) for rr in [r2, r3]: assert rr._results == r._results assert rr._stats == _truncate_floats(r._stats) assert rr._samples == r._samples assert rr._profiles == r._profiles assert rr.started_at == r._started_at assert rr.duration == _truncate_floats(r._duration) assert rr.benchmark_version == r._benchmark_version # Check the get_* methods assert sorted(r2.get_all_result_keys()) == sorted(values.keys()) for bench in r2.get_all_result_keys(): # Get with same parameters as stored params = r2.get_result_params(bench) assert params == values[bench]['params'] assert r2.get_result_value(bench, params) == values[bench]['result'] assert r2.get_result_samples(bench, params) == values[bench]['samples'] stats = r2.get_result_stats(bench, params) if values[bench]['number'][0] is None: assert stats == [None] else: assert stats[0]['number'] == values[bench]['number'][0] # Get with different parameters than stored (should return n/a) bad_params = [['foo', 'bar']] assert r2.get_result_value(bench, bad_params) == [None, None] assert r2.get_result_stats(bench, bad_params) == [None, None] assert r2.get_result_samples(bench, bad_params) == [None, None] # Get profile assert r2.get_profile(bench) == b'\x00\xff' # Check get_result_keys mock_benchmarks = { 'suite1.benchmark1': { 'version': '1' }, 'suite1.benchmark2': { 'version': '2' }, 'suite2.benchmark1': { 'version': '2' }, } assert sorted(r2.get_result_keys(mock_benchmarks)) == [ 'suite1.benchmark1', 'suite2.benchmark1' ]
def test_run_benchmarks(benchmarks_fixture, tmpdir): conf, repo, envs, commit_hash = benchmarks_fixture start_timestamp = datetime.datetime.utcnow() b = benchmarks.Benchmarks.discover(conf, repo, envs, [commit_hash]) # Old results to append to results = Results.unnamed() name = 'time_examples.TimeSuite.time_example_benchmark_1' results.add_result(b[name], runner.BenchmarkResult(result=[1], samples=[[42.0, 24.0]], number=[1], errcode=0, stderr='', profile=None), record_samples=True) # Run runner.run_benchmarks(b, envs[0], results=results, profile=True, show_stderr=True, append_samples=True, record_samples=True) times = ResultsWrapper(results, b) end_timestamp = datetime.datetime.utcnow() assert len(times) == len(b) assert times[ 'time_examples.TimeSuite.time_example_benchmark_1'].result != [None] stats = results.get_result_stats(name, b[name]['params']) assert isinstance(stats[0]['std'], float) # The exact number of samples may vary if the calibration is not fully accurate samples = results.get_result_samples(name, b[name]['params']) assert len(samples[0]) >= 4 # Explicitly provided 'prev_samples` should come first assert samples[0][:2] == [42.0, 24.0] # Benchmarks that raise exceptions should have a time of "None" assert times['time_secondary.TimeSecondary.time_exception'].result == [ None ] assert times['subdir.time_subdir.time_foo'].result != [None] if not ON_PYPY: # XXX: the memory benchmarks don't work on Pypy, since asizeof # is CPython-only assert times['mem_examples.mem_list'].result[0] > 1000 assert times['time_secondary.track_value'].result == [42.0] assert times['time_secondary.track_value'].profile is not None assert isinstance(times['time_examples.time_with_warnings'].stderr, type('')) assert times['time_examples.time_with_warnings'].errcode != 0 assert times['time_examples.TimeWithBadTimer.time_it'].result == [0.0] assert times['params_examples.track_param'].params == [[ "<class 'benchmark.params_examples.ClassOne'>", "<class 'benchmark.params_examples.ClassTwo'>" ]] assert times['params_examples.track_param'].result == [42, 42] assert times['params_examples.mem_param'].params == [['10', '20'], ['2', '3']] assert len(times['params_examples.mem_param'].result) == 2 * 2 assert times['params_examples.ParamSuite.track_value'].params == [[ "'a'", "'b'", "'c'" ]] assert times['params_examples.ParamSuite.track_value'].result == [ 1 + 0, 2 + 0, 3 + 0 ] assert isinstance(times['params_examples.TuningTest.time_it'].result[0], float) assert isinstance(times['params_examples.TuningTest.time_it'].result[1], float) assert isinstance(times['params_examples.time_skip'].result[0], float) assert isinstance(times['params_examples.time_skip'].result[1], float) assert util.is_nan(times['params_examples.time_skip'].result[2]) assert times['peakmem_examples.peakmem_list'].result[0] >= 4 * 2**20 assert times['cache_examples.ClassLevelSetup.track_example'].result == [ 500 ] assert times['cache_examples.ClassLevelSetup.track_example2'].result == [ 500 ] assert times['cache_examples.track_cache_foo'].result == [42] assert times['cache_examples.track_cache_bar'].result == [12] assert times['cache_examples.track_my_cache_foo'].result == [0] assert times['cache_examples.ClassLevelSetupFail.track_fail'].result == [ None ] assert 'raise RuntimeError()' in times[ 'cache_examples.ClassLevelSetupFail.track_fail'].stderr assert times[ 'cache_examples.ClassLevelCacheTimeout.track_fail'].result == [None] assert times[ 'cache_examples.ClassLevelCacheTimeoutSuccess.track_success'].result == [ 0 ] assert times['cache_examples.time_fail_second_run'].result == [None] assert times['cache_examples.time_fail_second_run'].samples == [None] profile_path = join(six.text_type(tmpdir), 'test.profile') with open(profile_path, 'wb') as fd: fd.write(times['time_secondary.track_value'].profile) pstats.Stats(profile_path) # Check for running setup on each repeat (one extra run from profile) # The output would contain error messages if the asserts in the benchmark fail. expected = ["<%d>" % j for j in range(1, 12)] assert times['time_examples.TimeWithRepeat.time_it'].stderr.split( ) == expected # Calibration of iterations should not rerun setup expected = (['setup'] * 2, ['setup'] * 3) assert times['time_examples.TimeWithRepeatCalibrate.time_it'].stderr.split( ) in expected # Check tuple-form repeat attribute produced results assert 2 <= len(times['time_examples.time_auto_repeat'].samples[0]) <= 4 # Check run time timestamps for name, result in times.items(): assert result.started_at >= util.datetime_to_js_timestamp( start_timestamp) assert result.ended_at >= result.started_at assert result.ended_at <= util.datetime_to_js_timestamp(end_timestamp)
def test_results(tmpdir): tmpdir = six.text_type(tmpdir) timestamp1 = datetime.datetime.utcnow() timestamp2 = datetime.datetime.utcnow() resultsdir = join(tmpdir, "results") for i in six.moves.xrange(10): r = results.Results({ 'machine': 'foo', 'arch': 'x86_64' }, {}, hex(i), i * 1000000, '2.7', 'some-environment-name') values = { 'suite1.benchmark1': { 'result': [float(i * 0.001)], 'stats': [{ 'foo': 1 }], 'samples': [[1, 2]], 'params': [['a']], 'version': "1", 'profile': b'\x00\xff' }, 'suite1.benchmark2': { 'result': [float(i * i * 0.001)], 'stats': [{ 'foo': 2 }], 'samples': [[3, 4]], 'params': [], 'version': "1", 'profile': b'\x00\xff' }, 'suite2.benchmark1': { 'result': [float((i + 1)**-1)], 'stats': [{ 'foo': 3 }], 'samples': [[5, 6]], 'params': [['c']], 'version': None, 'profile': b'\x00\xff' } } for key, val in values.items(): val = dict(val) version = val.pop('version') val = runner.BenchmarkResult(started_at=timestamp1, ended_at=timestamp2, errcode=0, stderr='', **val) r.add_result(key, val, version, record_samples=True) # Save / add_existing_results roundtrip r.save(resultsdir) r2 = results.Results.load(join(resultsdir, r._filename)) assert r2.date == r.date assert r2.commit_hash == r.commit_hash assert r2._filename == r._filename r3 = results.Results({'machine': 'bar'}, {}, 'a' * 8, 123, '3.5', 'something') r3.add_existing_results(r) for rr in [r2, r3]: assert rr._results == r._results assert rr._stats == r._stats assert rr._samples == r._samples assert rr._profiles == r._profiles assert rr.started_at == r._started_at assert rr.ended_at == r._ended_at assert rr.benchmark_version == r._benchmark_version # Check the get_* methods assert sorted(r2.get_all_result_keys()) == sorted(values.keys()) for bench in r2.get_all_result_keys(): # Get with same parameters as stored params = r2.get_result_params(bench) assert params == values[bench]['params'] assert r2.get_result_value(bench, params) == values[bench]['result'] assert r2.get_result_stats(bench, params) == values[bench]['stats'] assert r2.get_result_samples(bench, params) == values[bench]['samples'] # Get with different parameters than stored (should return n/a) bad_params = [['foo', 'bar']] assert r2.get_result_value(bench, bad_params) == [None, None] assert r2.get_result_stats(bench, bad_params) == [None, None] assert r2.get_result_samples(bench, bad_params) == [None, None] # Get profile assert r2.get_profile(bench) == b'\x00\xff' # Check get_result_keys mock_benchmarks = { 'suite1.benchmark1': { 'version': '1' }, 'suite1.benchmark2': { 'version': '2' }, 'suite2.benchmark1': { 'version': '2' }, } assert sorted(r2.get_result_keys(mock_benchmarks)) == [ 'suite1.benchmark1', 'suite2.benchmark1' ]