def benchmark_generate_balance(): from networkx.algorithms.string.balanced_sequence import random_balanced_sequence from networkx.algorithms.string.balanced_sequence import generate_balance seq, open_to_close = random_balanced_sequence(100, mode="paren") import timerit ti = timerit.Timerit(100, bestof=10, verbose=2) for timer in ti.reset('time'): with timer: list(generate_balance(seq, open_to_close)) import timerit ti = timerit.Timerit(100, bestof=10, verbose=2) for timer in ti.reset('time'): with timer: list(generate_balance2(seq, open_to_close)) import timerit ti = timerit.Timerit(100, bestof=10, verbose=2) for timer in ti.reset('time'): with timer: for t in seq: pass import timerit ti = timerit.Timerit(100, bestof=10, verbose=2) for timer in ti.reset('time'): with timer: for t in enumerate(seq): pass list(generate_balance2(seq, open_to_close))
def benchmark_math_vs_numpy(): import math import numpy as np import timerit ti = timerit.Timerit(100000, bestof=100, verbose=2) for timer in ti.reset('np.isclose'): x = np.random.rand() * 1000 with timer: np.isclose(x, 0) for timer in ti.reset('math.isclose'): x = np.random.rand() * 1000 with timer: math.isclose(x, 0) ti = timerit.Timerit(100000, bestof=100, verbose=2) for timer in ti.reset('multiple np.sqrt'): x = np.random.rand(2) * 1000 with timer: np.sqrt(x) for timer in ti.reset('multiple math.sqrt'): x = np.random.rand(2) * 1000 with timer: [math.sqrt(item) for item in x] import ubelt as ub import math import numpy as np import timerit operations = { 'math.sin': math.sin, 'np.sin': np.sin, 'math.sqrt': math.sqrt, 'np.sqrt': np.sqrt, 'np.exp': np.exp, 'math.exp': math.exp, 'math.asin': math.asin, 'np.arcsin': np.arcsin, 'math.isclose-0': lambda x: math.isclose(x, 0), 'np.isclose-0': lambda x: np.isclose(x, 0), 'math.atan2-1': lambda x: math.atan2(x, 1), 'np.atan2-1': lambda x: np.arctan2(x, 1), } ti = timerit.Timerit(100000, bestof=100, verbose=2) for opkey, op in operations.items(): for timer in ti.reset(opkey): x = np.random.rand() with timer: op(x) print('ti.rankings = {}'.format( ub.repr2(ti.rankings['mean'], nl=1, precision=9, align=':')))
def bench_zip_vs_conditional(): import itertools as it iterable = it.repeat(1) max_num = 10000 def clip_iterable_with_if(iterable, max_num): for idx, item in enumerate(iterable): if idx >= max_num: break yield item def clip_iterable_with_zip(iterable, max_num): for idx, item in zip(range(max_num), iterable): yield item import timerit ti = timerit.Timerit(100, bestof=10, verbose=2) for timer in ti.reset('clip_iterable_with_if'): with timer: list(clip_iterable_with_if(iterable, max_num)) # WINNER for timer in ti.reset('clip_iterable_with_zip'): with timer: list(clip_iterable_with_zip(iterable, max_num))
def main(): """ Is it faster to use a dtype as a string or by accessing np.float64 Turns out the getattr of np. adds enough overhead to make it worse than string, but when you import then use, you skip that and do slightly better. This is all contrived though. This is never the bottleneck. """ import xarray import numpy as np from numpy import float64 import timerit data = xarray.DataArray(np.empty(1, dtype=np.int16)) ti = timerit.Timerit(30000, bestof=10, verbose=2) for timer in ti.reset('str-dtype'): with timer: data.astype('float64') for timer in ti.reset('access-raw-dtype'): with timer: data.astype(np.float64) for timer in ti.reset('no-access-raw-dtype'): with timer: data.astype(float64)
def bench_set_membership(): """ Q: Is there a speedup to using a set when the set is small? A: There seems to be a small benefit, but maybe with more variance? Results: Timed best=356.988 ns, mean=391.727 ± 31.0 ns for small-set-membership Timed best=375.992 ns, mean=413.521 ± 21.9 ns for small-str-membership Timed best=362.983 ns, mean=415.807 ± 63.7 ns for small-list-membership """ import timerit import random ti = timerit.Timerit(300000, bestof=30, verbose=1) rng = random.Random(0) lhs_candidates = 'biufcmMOSUV' for i in range(100000): i += 1 # warmup, reduce variance? for timer in ti.reset('small-set-membership'): lhs = rng.choice(lhs_candidates) with timer: lhs in {'i', 'u', 'b'} for timer in ti.reset('small-str-membership'): lhs = rng.choice(lhs_candidates) with timer: lhs in 'iub' for timer in ti.reset('small-list-membership'): lhs = rng.choice(lhs_candidates) with timer: lhs in ['i', 'u', 'b']
def main(): modes = ['serial', 'thread', 'process'] max_workers = 8 njobs = 100 ti = timerit.Timerit(6, bestof=2, verbose=3, unit='ms') for mode in modes: for timer in ti.reset('time numpy_work ' + mode): executor = util_futures.Executor(mode, max_workers=max_workers) with executor: with timer: fs = [executor.submit(numpy_work) for i in range(njobs)] for f in futures.as_completed(fs): f.result() print('ti.measures = {}'.format(ub.repr2(ti.measures, nl=2, precision=4))) ti = timerit.Timerit(10, bestof=3, verbose=3, unit='ms') fpath = kwimage.grab_test_image_fpath() for mode in modes: for timer in ti.reset('time opencv_io_work ' + mode): executor = util_futures.Executor(mode, max_workers=max_workers) with executor: with timer: fs = [ executor.submit(opencv_io_work, fpath) for i in range(njobs) ] for f in futures.as_completed(fs): f.result() print('ti.measures = {}'.format(ub.repr2(ti.measures, nl=2, precision=4))) ti = timerit.Timerit(10, bestof=3, verbose=3, unit='ms') fpath = kwimage.grab_test_image_fpath() for mode in modes: for timer in ti.reset('time opencv_io_work ' + mode): executor = util_futures.Executor(mode, max_workers=max_workers) with executor: with timer: fs = [ executor.submit(opencv_cpu_io_work, fpath) for i in range(njobs) ] for f in futures.as_completed(fs): f.result() print('ti.measures = {}'.format(ub.repr2(ti.measures, nl=2, precision=4)))
def TIMERIT(label): # Ensure each timer run uses the same random numbers rng.seed(0) return timerit.Timerit( num=num, bestof=1, label=label, # unit='us', unit='ms', )
def benchmark_multi_or_combined_import(): """ Combining all imports into a single line is slightly faster """ import ubelt as ub attr_names = [ 'altsep', 'basename', 'commonpath', 'commonprefix', 'curdir', 'defpath', 'devnull', 'dirname', 'exists', 'expanduser', 'expandvars', 'extsep', 'genericpath', 'getatime', 'getctime', 'getmtime', 'getsize', 'isabs', 'isdir', 'isfile', 'islink', 'ismount', 'join', 'lexists', 'normcase', 'normpath', 'os', 'pardir', 'pathsep', 'realpath', 'relpath', 'samefile', ] combined_lines = 'from os.path import ' + ', '.join(attr_names) multi_lines = '; '.join( ['from os.path import ' + name for name in attr_names]) import timerit ti = timerit.Timerit(10, bestof=3, verbose=2) for timer in ti.reset('combined_lines'): with timer: ub.cmd('python -c "{}"'.format(combined_lines), check=True) for timer in ti.reset('multi_lines'): with timer: info = ub.cmd('python -c "{}"'.format(multi_lines))
def bench_closures(): """ Is it faster to use a closure or pass in the variables explicitly? """ import ubelt as ub import timerit import numpy as np # Test a nested func with vs without a closure def rand_complex(*shape): real = np.random.rand(*shape).astype(np.complex) imag = np.random.rand(*shape).astype(np.complex) * 1j mat = real + imag return mat s = int(ub.argval('--s', default='1')) mat1 = rand_complex(s, s) mat2 = rand_complex(s, s) N = 1000 offset = 100 def nested_closure(): mat3 = mat1 @ mat2 for i in range(N): mat3 += i + offset def nested_explicit(mat1, mat2, N, offset): mat3 = mat1 @ mat2 for i in range(N): mat3 += i + offset ti = timerit.Timerit(int(2**11), bestof=int(2**8), verbose=int(ub.argval('--verbose', default='1'))) for timer in ti.reset('nested_explicit'): with timer: nested_explicit(mat1, mat2, N, offset) for timer in ti.reset('nested_closure'): with timer: nested_closure() print('rankings = {}'.format(ub.repr2(ti.rankings, precision=9, nl=2))) print('consistency = {}'.format(ub.repr2(ti.consistency, precision=9, nl=2))) positions = ub.ddict(list) for m1, v1 in ti.rankings.items(): for pos, label in enumerate(ub.argsort(v1), start=0): positions[label].append(pos) average_position = ub.map_vals(lambda x: sum(x) / len(x), positions) print('average_position = {}'.format(ub.repr2(average_position)))
def benchmark_loop_first_variants(): import ubelt as ub basis = { 'num_items': [10, 100, 1000, 10000, 100000], } data_grid = ub.named_product(**basis) rows = [] import timerit ti = timerit.Timerit(100, bestof=10, verbose=2) for data_kw in data_grid: items = list(range(data_kw['num_items'])) method_name = 'loop_first' for timer in ti.reset(method_name): with timer: method_loop_first(items) rows.append({ 'num_items': data_kw['num_items'], 'method_name': method_name, 'mean': ti.mean() }) method_name = 'loop_first2' for timer in ti.reset(method_name): with timer: method_loop_first2(items) rows.append({ 'num_items': data_kw['num_items'], 'method_name': method_name, 'mean': ti.mean() }) method_name = 'enumerate' for timer in ti.reset(method_name): with timer: method_enumerate(items) rows.append({ 'num_items': data_kw['num_items'], 'method_name': method_name, 'mean': ti.mean() }) print('ti.rankings = {}'.format(ub.repr2(ti.rankings, nl=2, align=':'))) return rows
def main(): import ubelt as ub root_dpath = ub.Path('/etc') import timerit ti = timerit.Timerit(100, bestof=10, verbose=2) for timer in ti.reset('time'): with timer: items1 = sorted(walk_with_scandir(root_dpath)) for timer in ti.reset('time'): with timer: items2 = sorted(walk_with_walk(root_dpath)) len(items1) len(items2)
def mwe_check_before_select(): import ubelt as ub import numpy as np results = [] ns = np.logspace(1, 7, 100).astype(np.int) for n in ub.ProgIter(ns, desc='time-tradeoff', verbose=3): print('n = {!r}'.format(n)) y_true = np.random.randint(0, 100, n).astype(np.int64) y_pred = np.random.randint(0, 100, n).astype(np.int64) sample_weight = np.random.rand(n) isvalid = np.random.rand(n) > 0.5 import timerit ti = timerit.Timerit(9, bestof=3, verbose=2) for timer in ti.reset('all-check'): with timer: np.all(isvalid) results.append({ 'n': n, 'label': ti.label, 'time': ti.mean(), }) for timer in ti.reset('all-index'): with timer: y_true[isvalid] y_pred[isvalid] sample_weight[isvalid] results.append({ 'n': n, 'label': ti.label, 'time': ti.mean(), }) import pandas as pd df = pd.DataFrame(results) import kwplot import seaborn as sns kwplot.autoplt() sns.set() ax = sns.lineplot(data=df, x='n', y='time', hue='label') ax.set_yscale('log') ax.set_xscale('log') pass
def bench_dict_isect(): import ubelt as ub def random_dict(n): import random keys = set(random.randint(0, n) for _ in range(n)) return {k: k for k in keys} d1 = random_dict(1000) d2 = random_dict(1000) import xdev xdev.profile_now(ub.dict_isect)(d1, d2) xdev.profile_now(dict_isect_variant0)(d1, d2) xdev.profile_now(dict_isect_variant1)(d1, d2) xdev.profile_now(dict_isect_variant2)(d1, d2) xdev.profile_now(dict_isect_variant3)(d1, d2) import timerit ti = timerit.Timerit(100, bestof=10, verbose=2) for timer in ti.reset('current'): with timer: ub.dict_isect(d1, d2) for timer in ti.reset('inline'): with timer: {k: v for k, v in d1.items() if k in d2} for timer in ti.reset('dict_isect_variant0'): with timer: dict_isect_variant0(d1, d2) for timer in ti.reset('dict_isect_variant1'): with timer: dict_isect_variant1(d1, d2) for timer in ti.reset('dict_isect_variant2'): with timer: dict_isect_variant1(d1, d2) for timer in ti.reset('dict_isect_variant3'): with timer: dict_isect_variant3(d1, d2) print('ti.rankings = {}'.format(ub.repr2(ti.rankings['min'], precision=8, align=':', nl=1, sort=0)))
def variant(): import random import ubelt as ub num_items = 100 num_other = 1 first_keys = [random.randint(0, 1000) for _ in range(num_items)] remove_sets = [list(ub.unique(random.choices(first_keys, k=10) + [random.randint(0, 1000) for _ in range(num_items)])) for _ in range(num_other)] first_dict = {k: k for k in first_keys} args = [first_dict] + [{k: k for k in ks} for ks in remove_sets] dictclass = dict import timerit ti = timerit.Timerit(100, bestof=10, verbose=2) for timer in ti.reset('orig'): with timer: keys = set(first_dict) keys.difference_update(*map(set, args[1:])) new0 = dictclass((k, first_dict[k]) for k in keys) for timer in ti.reset('alt1'): with timer: remove_keys = {k for ks in args[1:] for k in ks} new1 = dictclass((k, v) for k, v in first_dict.items() if k not in remove_keys) for timer in ti.reset('alt2'): with timer: remove_keys = set.union(*map(set, args[1:])) new2 = dictclass((k, v) for k, v in first_dict.items() if k not in remove_keys) for timer in ti.reset('alt3'): with timer: remove_keys = set.union(*map(set, args[1:])) new3 = dictclass((k, first_dict[k]) for k in first_dict.keys() if k not in remove_keys) # Cannot use until 3.6 is dropped (it is faster) for timer in ti.reset('alt4'): with timer: remove_keys = set.union(*map(set, args[1:])) new4 = {k: v for k, v in first_dict.items() if k not in remove_keys} assert new1 == new0 assert new2 == new0 assert new3 == new0 assert new4 == new0
def benchmark(): import timerit import ubelt as ub from kwcoco.util.util_futures import JobPool # NOQA ti = timerit.Timerit(3, bestof=1, verbose=2) max_workers = 4 # Choose a path to an HDD dpath = ub.ensuredir('/raid/data/tmp') fpath_demodata = _demodata_files(dpath=dpath, num_files=1000, size_pool=[10, 20, 50], pool_size=8) for timer in ti.reset('hash_file(hasher=xx64)'): with timer: for fpath in fpath_demodata: ub.hash_file(fpath, hasher='xx64') for timer in ti.reset('hash_file(hasher=xxhash) - serial'): # jobs = JobPool(mode='thread', max_workers=2) jobs = JobPool(mode='serial', max_workers=max_workers) with timer: for fpath in fpath_demodata: jobs.submit(ub.hash_file, fpath, hasher='xxhash') results = [job.result() for job in jobs.jobs] for timer in ti.reset('hash_file(hasher=xxhash) - thread'): # jobs = JobPool(mode='thread', max_workers=2) jobs = JobPool(mode='thread', max_workers=max_workers) with timer: for fpath in fpath_demodata: jobs.submit(ub.hash_file, fpath, hasher='xx64') results = [job.result() for job in jobs.jobs] for timer in ti.reset('hash_file(hasher=xxhash) - process'): # jobs = JobPool(mode='thread', max_workers=2) jobs = JobPool(mode='process', max_workers=max_workers) with timer: for fpath in fpath_demodata: jobs.submit(ub.hash_file, fpath, hasher='xx64') results = [job.result() for job in jobs.jobs]
def bench_notnot_vs_bool(): """ References: https://www.youtube.com/watch?v=9gEX7jesV34 """ x = 42 import timerit ti = timerit.Timerit(1000000, bestof=10, verbose=3, unit='ns') for timer in ti.reset('not not'): with timer: result1 = not not x for timer in ti.reset('bool'): with timer: result2 = bool(x) print('result1 = {!r}'.format(result1)) print('result2 = {!r}'.format(result2))
def main(): import timerit from os.path import join from skimage import data as skimage_data from skimage.data import image_fetcher skimage_data.download_all() fpaths = [ join(image_fetcher.path, fname) for fname in image_fetcher.registry.keys() if fname.endswith(('.tif', '.png', '.jpg')) ] # Load a lot of files fpaths = fpaths * 15 if 0: # Sanity check counts, images = zip(*list(load_serial(fpaths))) print('counts = {!r}'.format(counts)) counts, images = zip(*list(load_concurrent(fpaths))) print('counts = {!r}'.format(counts)) counts, images = zip(*list(load_asyncio_pure_python(fpaths))) print('counts = {!r}'.format(counts)) ti = timerit.Timerit(50, bestof=3, verbose=1) for timer in ti.reset('concurrent'): with timer: list(load_concurrent(fpaths)) for timer in ti.reset('load_asyncio_pure_python'): with timer: list(load_asyncio_pure_python(fpaths)) for timer in ti.reset('load_asyncio_with_uvloop'): with timer: list(load_asyncio_with_uvloop(fpaths)) for timer in ti.reset('serial'): with timer: list(load_serial(fpaths))
def test_manager(): """ Look at how managers works """ from multiprocessing import Manager import kwcoco dset = kwcoco.CocoDataset.coerce('shapes32') manager = Manager() managed_imgs = manager.dict(dset.imgs) import timerit ti = timerit.Timerit(100, bestof=10, verbose=2) for timer in ti.reset('time'): with timer: managed_imgs.keys() for timer in ti.reset('time'): with timer: dset.imgs.keys()
def main(): import timerit import ubelt as ub import random import string # expected = "58178059833426840615453390153965" length = 20 expected = ''.join(random.choices(string.printable, k=length)) def flip_char(text, pos): old = text[pos] new = random.choice(string.printable) while new == old: pass before = text[:pos - 1] after = text[pos:] return before + new + after variants = dict( ne_first=flip_char(expected, 0), ne_mid=flip_char(expected, length // 2), ne_last=flip_char(expected, length - 1), too_long='F' * len(expected) * 10, too_short='F', correct=expected, ) ti = timerit.Timerit(10000000, bestof=10, verbose=2) for key, value in variants.items(): for _ in ti.reset(key): value == expected print('ti.rankings = {}'.format( ub.repr2(ti.rankings['min'], nl=2, align=':')))
def _benchmark_distinguish_tensor_ndarray(): import timerit ti = timerit.Timerit(10000, bestof=1000, verbose=1) array = np.arange(100) tensor = torch.arange(100) totals = ub.ddict(lambda: 0) for data in [array, tensor]: for timer in ti.reset(label='is_tensor(data)'): with timer: torch.is_tensor(data) totals[ti.label] += ti.mean() for timer in ti.reset(label='isinstance(data, np.ndarray)'): with timer: isinstance(data, np.ndarray) totals[ti.label] += ti.mean() for timer in ti.reset(label='isinstance(data, torch.Tensor)'): with timer: isinstance(data, torch.Tensor) totals[ti.label] += ti.mean()
def bench_hashfile_blocksize(): """ Test speed of hashing with various blocksize strategies """ dpath = ub.ensuredir(ub.expandpath('$HOME/raid/data/tmp')) size_pool = [10000] rng = random.Random(0) # Create a pool of random chunks of data chunksize = int(2 ** 20) pool_size = 8 part_pool = [_random_data(rng, chunksize) for _ in range(pool_size)] # Write a big file (~600 MB) fpath = _write_random_file(dpath, part_pool, size_pool, rng) import os size_mb = os.stat(fpath).st_size / 1e6 print('file size = {!r} MB'.format(size_mb)) from ubelt.util_hash import _rectify_hasher hasher_algo = 'xx64' import timerit ti = timerit.Timerit(4, bestof=2, verbose=2) # hasher = _rectify_hasher(hash_algo)() # with timer: # with open(fpath, 'rb') as file: # buf = file.read(blocksize) # while len(buf) > 0: # hasher.update(buf) # buf = file.read(blocksize) # result = hasher.hexdigest() results = [] # Constant blocksize is the winner as long as its chosen right. for timer in ti.reset('constant blocksize'): blocksize = int(2 ** 20) hasher = _rectify_hasher(hasher_algo)() with timer: with open(fpath, 'rb') as file: buf = file.read(blocksize) while len(buf) > 0: hasher.update(buf) buf = file.read(blocksize) result = hasher.hexdigest() results.append(result) for timer in ti.reset('double blocksize'): blocksize = int(2 ** 20) hasher = _rectify_hasher(hasher_algo)() with timer: with open(fpath, 'rb') as file: buf = file.read(blocksize) while len(buf) > 0: hasher.update(buf) blocksize *= 2 buf = file.read(blocksize) result = hasher.hexdigest() results.append(result) for timer in ti.reset('double blocksize + limit'): max_blocksize = int(2 ** 20) * 16 blocksize = int(2 ** 20) hasher = _rectify_hasher(hasher_algo)() with timer: with open(fpath, 'rb') as file: buf = file.read(blocksize) while len(buf) > 0: hasher.update(buf) blocksize = min(2 * blocksize, max_blocksize) buf = file.read(blocksize) result = hasher.hexdigest() results.append(result)
def bench_find_optimal_blocksize(): r""" This function can help find the optimal blocksize for your usecase:w Notes: # Usage cd ~/code/ubelt/dev xdoctest bench_hash_file.py bench_find_optimal_blocksize \ --dpath <PATH-TO-HDD-OR-SDD> \ --size <INT-IN-MB> \ --hash_algo <ALGO_NAME> \ # Benchmark on an HDD xdoctest bench_hash_file.py bench_find_optimal_blocksize \ --size 500 \ --dpath $HOME/raid/data/tmp \ --hash_algo xx64 # Benchmark on an SSD xdoctest bench_hash_file.py bench_find_optimal_blocksize \ --size 500 \ --dpath $HOME/.cache/ubelt/tmp \ --hash_algo xx64 # Test a small file xdoctest bench_hash_file.py bench_find_optimal_blocksize \ --size 1 \ --dpath $HOME/.cache/ubelt/tmp \ --hash_algo xx64 Throughout our tests on SSDs / HDDs with small and large files we are finding a chunksize of 2 ** 20 consistently working best with xx64. # Test with a slower hash algo xdoctest bench_hash_file.py bench_find_optimal_blocksize \ --size 500 \ --dpath $HOME/raid/data/tmp \ --hash_algo sha1 Even that shows 2 ** 20 working well. """ import os import numpy as np import timerit dpath = ub.argval('--dpath', default=None) if dpath is None: # dpath = ub.ensuredir(ub.expandpath('$HOME/raid/data/tmp')) dpath = ub.ensure_app_cache_dir('ubelt/hash_test') else: ub.ensuredir(dpath) print('dpath = {!r}'.format(dpath)) target_size = int(ub.argval('--size', default=600)) hash_algo = ub.argval('--hash_algo', default='xx64') print('hash_algo = {!r}'.format(hash_algo)) print('target_size = {!r}'.format(target_size)) # Write a big file (~600 MB) MB = int(2 ** 20) size_pool = [target_size] rng = random.Random(0) # pool_size = max(target_size // 2, 1) # pool_size = max(1, target_size // 10) pool_size = 8 part_pool = [_random_data(rng, MB) for _ in range(pool_size)] fpath = _write_random_file(dpath, part_pool, size_pool, rng) print('fpath = {!r}'.format(fpath)) size_mb = os.stat(fpath).st_size / MB print('file size = {!r} MB'.format(size_mb)) ti = timerit.Timerit(4, bestof=2, verbose=2) results = [] # Find an optimal constant blocksize min_power = 16 max_power = 24 blocksize_candiates = [int(2 ** e) for e in range(min_power, max_power)] for blocksize in blocksize_candiates: for timer in ti.reset('constant blocksize=2 ** {} = {}'.format(np.log2(float(blocksize)), blocksize)): result = ub.hash_file(fpath, blocksize=blocksize, hasher=hash_algo) results.append(result) print('ti.rankings = {}'.format(ub.repr2(ti.rankings, nl=2, align=':'))) assert ub.allsame(results)
def benchmark_template(): import ubelt as ub import pandas as pd import timerit def method1(x, y, z): ret = [] for i in range((x + y) * z): ret.append(i) return ret def method2(x, y, z): ret = [i for i in range((x + y) * z)] return ret method_lut = locals() # can populate this some other way # Change params here to modify number of trials ti = timerit.Timerit(100, bestof=10, verbose=1) # if True, record every trail run and show variance in seaborn # if False, use the standard timerit min/mean measures RECORD_ALL = True # These are the parameters that we benchmark over basis = { 'method': ['method1', 'method2'], 'x': list(range(7)), 'y': [0, 100], 'z': [2, 3] # 'param_name': [param values], } xlabel = 'x' # Set these to param labels that directly transfer to method kwargs kw_labels = ['x', 'y', 'z'] # Set these to empty lists if they are not used group_labels = { 'style': ['y'], 'size': ['z'], } group_labels['hue'] = list((ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.repr2(ub.dict_isect( params, labels), compact=1, si=1) key = ub.repr2(params, compact=1, si=1) # Make any modifications you need to compute input kwargs for each # method here. kwargs = ub.dict_isect(params.copy(), kw_labels) method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... with timer: # Put the logic you want to time here method(**kwargs) if RECORD_ALL: # Seaborn will show the variance if this is enabled, otherwise # use the robust timerit mean / min times chunk_iter = ub.chunks(ti.times, ti.bestof) times = list(map(min, chunk_iter)) # TODO: timerit method for this for time in times: row = { # 'mean': ti.mean(), 'time': time, 'key': key, **group_keys, **params, } rows.append(row) else: row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **group_keys, **params, } rows.append(row) time_key = 'time' if RECORD_ALL else 'min' # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) data = data.sort_values(time_key) if RECORD_ALL: # Show the min / mean if we record all min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1) mean_times = data.groupby('key')[['time' ]].mean().rename({'time': 'mean'}, axis=1) stats_data = pd.concat([min_times, mean_times], axis=1) stats_data = stats_data.sort_values('min') else: stats_data = data USE_OPENSKILL = 1 if USE_OPENSKILL: # Lets try a real ranking method # https://github.com/OpenDebates/openskill.py import openskill method_ratings = {m: openskill.Rating() for m in basis['method']} other_keys = sorted( set(stats_data.columns) - {'key', 'method', 'min', 'mean', 'hue_key', 'size_key', 'style_key'}) for params, variants in stats_data.groupby(other_keys): variants = variants.sort_values('mean') ranking = variants['method'].reset_index(drop=True) mean_speedup = variants['mean'].max() / variants['mean'] stats_data.loc[mean_speedup.index, 'mean_speedup'] = mean_speedup min_speedup = variants['min'].max() / variants['min'] stats_data.loc[min_speedup.index, 'min_speedup'] = min_speedup if USE_OPENSKILL: # The idea is that each setting of parameters is a game, and each # "method" is a player. We rank the players by which is fastest, # and update their ranking according to the Weng-Lin Bayes ranking # model. This does not take the fact that some "games" (i.e. # parameter settings) are more important than others, but it should # be fairly robust on average. old_ratings = [[r] for r in ub.take(method_ratings, ranking)] new_values = openskill.rate(old_ratings) # Not inplace new_ratings = [openskill.Rating(*new[0]) for new in new_values] method_ratings.update(ub.dzip(ranking, new_ratings)) print('Statistics:') print(stats_data) if USE_OPENSKILL: from openskill import predict_win win_prob = predict_win([[r] for r in method_ratings.values()]) skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False) print('Aggregated Rankings =\n{}'.format(skill_agg)) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plt = kwplot.autoplt() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y=time_key, marker='o', ax=ax, **plotkw) ax.set_title('Benchmark Name') ax.set_xlabel('Size (todo: A better x-variable description)') ax.set_ylabel('Time (todo: A better y-variable description)') # ax.set_xscale('log') # ax.set_yscale('log') try: __IPYTHON__ except NameError: plt.show()
def bench_dict_row_vs_col(): """ Check how long it takes to access atributes when we store them as either List[Dict] or Dict[List]. In other words the strategies to store data are either Row Major: [ {'col_1': <item_0_0>, 'col_2': <item_1_0>, 'col_3': <item_2_0>}, {'col_1': <item_0_1>, 'col_2': <item_1_1>, 'col_3': <item_2_1>}, {'col_1': <item_0_2>, 'col_2': <item_1_2>, 'col_3': <item_2_2>}, ... ] Column Major: { 'col_1': [<item_0_0>, <item_1_0>, <item_2_0>, ...], 'col_2': [<item_0_1>, <item_1_1>, <item_2_1>, ...], 'col_3': [<item_0_2>, <item_1_2>, <item_2_2>, ...], } Conclusion: Using a Dictionary of Lists (i.e. column based data) is between 2x and 4x faster in these tests. """ import random rng = random.Random() ncols = int(3) nrows = int(1e5) def random_item(rng): item = rng.randbytes(rng.randint(1, 10)) return item col_names = ['col_{:03d}'.format(c) for c in range(ncols)] row_major = [{col: random_item(rng) for col in col_names} for rx in range(nrows)] column_major = {col: [row[col] for row in row_major] for col in col_names} import timerit import random ti = timerit.Timerit(100, bestof=10, verbose=1) col = col_names[rng.randint(0, len(col_names) - 1)] for timer in ti.reset('iterate-one-column (RM-C)'): with timer: result1 = [row[col] for row in row_major] for timer in ti.reset('iterate-one-column (CM-F)'): with timer: result2 = [item for item in column_major[col]] assert result1 == result2 for timer in ti.reset('iterate-all-columns (RM-C)'): with timer: result1 = [[row[col] for col in col_names] for row in row_major] for timer in ti.reset('iterate-all-columns (CM-F)'): with timer: result2 = [ list(items) for items in zip(*[column_major[c] for c in col_names]) ] assert result1 == result2 for timer in ti.reset('column-to-row-based'): with timer: row_major2 = [ dict(zip(col_names, items)) for items in zip(*[column_major[c] for c in col_names]) ] assert row_major2 == row_major for timer in ti.reset('row-to-column-based'): with timer: col_major2 = { col: [row[col] for row in row_major] for col in col_names } assert col_major2 == column_major # No real difference for single item access ti = timerit.Timerit(100000, bestof=10, verbose=1) row = rng.randint(0, nrows - 1) for timer in ti.reset('access-one-item (RM-C)'): with timer: result1 = row_major[row][col] for timer in ti.reset('access-one-item (CM-F)'): with timer: result2 = column_major[col][row] assert result1 == result2 for timer in ti.reset('access-one-item (precache col) (CM-F)'): column = column_major[col] with timer: result2 = column[row] for timer in ti.reset('access-one-item (precache row) (CM-F)'): rowitems = row_major[row] with timer: result2 = rowitems[col]
def bench_local_versus_global_import(): """ Write two python files that loop over a test function that uses some external module. One version imports the dependency globally at startup, the other does a lazy import of the module inside the function. We time how long this takes over several tests where we varry the number of times this inner function is looped over (and thus the number of times we will run over the lazy import versus accessing the global import). It should be noted that startup time of the interpreter will play a considerable role in these measurements. Any ideas for mitigating that would be appreciated. """ import ubelt as ub from os.path import join import timerit ti = timerit.Timerit(30, bestof=3, verbose=2) for num in [0, 1, 10, 1000, 1000, 10000]: fmtkw = { # 'modname': 'numpy', # 'attrname': 'array', # 'modname': 'ubelt', # 'attrname': 'take', 'modname': 'networkx', 'attrname': 'Graph', 'num': 100000, } global_codeblock = ub.codeblock(''' import {modname} def testfunc(): return {modname}.{attrname} def main(): for _ in range({num}): testfunc() if __name__ == '__main__': testfunc() ''').format(**fmtkw) local_codeblock = ub.codeblock(''' def testfunc(): import {modname} return {modname}.{attrname} def main(): for _ in range({num}): testfunc() if __name__ == '__main__': testfunc() ''').format(**fmtkw) dpath = ub.ensure_app_cache_dir('ubelt/bench') local_modpath = join(dpath, 'local_import_test.py') global_modpath = join(dpath, 'global_import_test.py') ub.writeto(local_modpath, local_codeblock) ub.writeto(global_modpath, global_codeblock) ub.cmd('python ' + global_modpath) for timer in ti.reset('local imports @ {}'.format(num)): with timer: ub.cmd('python ' + local_modpath) for timer in ti.reset('global imports @ {}'.format(num)): with timer: ub.cmd('python ' + global_modpath) print('ti.rankings = {}'.format( ub.repr2(ti.rankings, nl=2, precision=4, align=':')))
def benchmark_repeat_vs_reduce_mul(): import ubelt as ub import pandas as pd import timerit def reduce_daq_rec(func, arrs): if len(arrs) == 1: return arrs[0] if len(arrs) == 2: return func(arrs[0], arrs[1]) elif len(arrs) == 3: return func(func(arrs[0], arrs[1]), arrs[3]) else: arrs1 = arrs[0::2] arrs2 = arrs[1::2] res1 = reduce_daq_rec(func, arrs1) res2 = reduce_daq_rec(func, arrs2) res = func(res1, res2) return res def reduce_daq_iter(func, arrs): """ https://www.baeldung.com/cs/convert-recursion-to-iteration https://stackoverflow.com/questions/159590/way-to-go-from-recursion-to-iteration arrs = [2, 3, 5, 7, 11, 13, 17, 21] """ raise NotImplementedError # TODO: make the iterative version from collections import deque empty_result = None stack = deque([(arrs, empty_result)]) idx = 0 while stack: print('----') print('stack = {}'.format(ub.repr2(list(stack), nl=1))) arrs0, result = stack.pop() if len(arrs0) == 0: raise Exception if result is not None: # raise Exception results = [result] while stack: next_arrs0, next_result = stack.pop() if next_result is None: break else: results.append(next_result) if results: if len(results) == 1: stack.append((results, results[0])) else: stack.append((results, None)) if next_result is None: stack.append((next_arrs0, None)) elif result is None: if len(arrs0) == 1: result = arrs0[0] stack.append((arrs0, result)) # return arrs0[0] if len(arrs0) == 2: result = func(arrs0[0], arrs0[1]) stack.append((arrs0, result)) elif len(arrs0) == 3: result = func(func(arrs0[0], arrs0[1]), arrs0[3]) stack.append((arrs0, result)) else: arrs01 = arrs0[0::2] arrs02 = arrs0[1::2] stack.append((arrs0, empty_result)) stack.append((arrs01, empty_result)) stack.append((arrs02, empty_result)) # res1 = reduce_daq_rec(func, arrs01) # res2 = reduce_daq_rec(func, arrs2) # res = func(res1, res2) idx += 1 if idx > 10: raise Exception return res def method_daq_rec(arrs): return reduce_daq_rec(np.multiply, arrs) def method_repeat(arrs): """ helper code: arr_names = ['a{:02d}'.format(idx) for idx in range(1, 32 + 1)] lhs = ', '.join(arr_names) rhs = ' * '.join(arr_names) print(f'{lhs} = arrs') print(f'ret = {rhs}') """ # Hard coded pure python syntax for multiplying if len(arrs) == 4: a01, a02, a03, a04 = arrs ret = a01 * a02 * a03 * a04 elif len(arrs) == 8: a01, a02, a03, a04, a05, a06, a07, a08 = arrs ret = a01 * a02 * a03 * a04 * a05 * a06 * a07 * a08 elif len(arrs) == 32: a01, a02, a03, a04, a05, a06, a07, a08, a09, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32 = arrs ret = a01 * a02 * a03 * a04 * a05 * a06 * a07 * a08 * a09 * a10 * a11 * a12 * a13 * a14 * a15 * a16 * a17 * a18 * a19 * a20 * a21 * a22 * a23 * a24 * a25 * a26 * a27 * a28 * a29 * a30 * a31 * a32 return ret def method_reduce(arrs): ret = np.multiply.reduce(arrs) return ret def method_stack(arrs): stacked = np.stack(arrs) ret = stacked.prod(axis=0) return ret method_lut = locals() # can populate this some other way ti = timerit.Timerit(10000, bestof=10, verbose=2) basis = { 'method': ['method_repeat', 'method_reduce', 'method_stack', 'method_daq_rec'], 'arr_size': [10, 100, 1000, 10000], 'num_arrs': [4, 8, 32], } xlabel = 'arr_size' kw_labels = [] group_labels = { 'style': ['num_arrs'], 'size': [], } group_labels['hue'] = list((ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.repr2(ub.dict_isect( params, labels), compact=1, si=1) key = ub.repr2(params, compact=1, si=1) kwargs = ub.dict_isect(params.copy(), kw_labels) arr_size = params['arr_size'] num_arrs = params['num_arrs'] arrs = [] for _ in range(num_arrs): arr = np.random.rand(arr_size) arrs.append(arr) kwargs['arrs'] = arrs method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... with timer: # Put the logic you want to time here method(**kwargs) row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **group_keys, **params, } rows.append(row) # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) data = data.sort_values('min') print(data) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y='min', marker='o', ax=ax, **plotkw) ax.set_title('Benchmark') ax.set_xlabel('Array Size') ax.set_ylabel('Time')
def run_benchmark_renormalization(): """ See if we can renormalize probabilities after update with a faster method that maintains memory a bit better Example: >>> import sys, ubelt >>> sys.path.append(ubelt.expandpath('~/misc/tests/python')) >>> from bench_renormalization import * # NOQA >>> run_benchmark_renormalization() """ import ubelt as ub import xdev import pathlib import timerit fpath = pathlib.Path('~/misc/tests/python/renormalize_cython.pyx').expanduser() renormalize_cython = xdev.import_module_from_pyx(fpath, annotate=True, verbose=3, recompile=True) xdev.profile_now(renormalize_demo_v1)(1000, 100) xdev.profile_now(renormalize_demo_v2)(1000, 100) xdev.profile_now(renormalize_demo_v3)(1000, 100) xdev.profile_now(renormalize_demo_v4)(1000, 100) func_list = [ # renormalize_demo_v1, renormalize_demo_v2, # renormalize_demo_v3, # renormalize_demo_v4, renormalize_cython.renormalize_demo_cython_v1, renormalize_cython.renormalize_demo_cython_v2, renormalize_cython.renormalize_demo_cython_v3, ] methods = {f.__name__: f for f in func_list} for key, method in methods.items(): with timerit.Timer(label=key, verbose=0) as t: method(1000, 100) print(f'{key:<30} {t.toc():0.6f}') arg_basis = { 'T': [10, 20, 30, 50], 'D': [10, 50, 100, 300], } args_grid = [] for argkw in list(ub.named_product(arg_basis)): if argkw['T'] <= argkw['D']: arg_basis['size'] = argkw['T'] * argkw['D'] args_grid.append(argkw) ti = timerit.Timerit(100, bestof=10, verbose=2) measures = [] for method_name, method in methods.items(): for argkw in args_grid: row = ub.dict_union({'method': method_name}, argkw) key = ub.repr2(row, compact=1) argkey = ub.repr2(argkw, compact=1) kwargs = ub.dict_subset(argkw, ['T', 'D']) for timer in ti.reset('time'): with timer: method(**kwargs) row['mean'] = ti.mean() row['min'] = ti.min() row['key'] = key row['argkey'] = argkey measures.append(row) import pandas as pd df = pd.DataFrame(measures) import kwplot sns = kwplot.autosns() kwplot.figure(fnum=1, pnum=(1, 2, 1), docla=True) sns.lineplot(data=df, x='D', y='min', hue='method', style='method') kwplot.figure(fnum=1, pnum=(1, 2, 2), docla=True) sns.lineplot(data=df, x='T', y='min', hue='method', style='method') p = (df.pivot(['method'], ['argkey'], ['mean'])) print(p.mean(axis=1).sort_values())
def reorder_axes(self, new_order, inplace=False): """ Change the ordering of the coordinate axes. Args: new_order (Tuple[int]): ``new_order[i]`` should specify which axes in the original coordinates should be mapped to the ``i-th`` position in the returned axes. inplace (bool, default=False): if True, modifies data inplace Returns: Coords: modified coordinates Note: This is the ordering of the "columns" in final numpy axis, not the numpy axes themselves. Example: >>> from kwimage.structs.coords import * # NOQA >>> self = Coords(data=np.array([ >>> [7, 11], >>> [13, 17], >>> [21, 23], >>> ])) >>> new = self.reorder_axes((1, 0)) >>> print('new = {!r}'.format(new)) new = <Coords(data= array([[11, 7], [17, 13], [23, 21]]))> Example: >>> from kwimage.structs.coords import * # NOQA >>> self = Coords.random(10, rng=0) >>> new = self.reorder_axes((1, 0)) >>> # Remapping using 1, 0 reverses the axes >>> assert np.all(new.data[:, 0] == self.data[:, 1]) >>> assert np.all(new.data[:, 1] == self.data[:, 0]) >>> # Remapping using 0, 1 does nothing >>> eye = self.reorder_axes((0, 1)) >>> assert np.all(eye.data == self.data) >>> # Remapping using 0, 0, destroys the 1-th column >>> bad = self.reorder_axes((0, 0)) >>> assert np.all(bad.data[:, 0] == self.data[:, 0]) >>> assert np.all(bad.data[:, 1] == self.data[:, 0]) """ impl = self._impl new = self if inplace else self.__class__(impl.copy(self.data), self.meta) if True: # --- Method 1 - Slicing --- # This will use slicing tricks to avoid a copy operation, but the # data.flags will be modified and contiguous-ness is not preserved new.data = new.data[..., new_order] if False: # --- Method 2 - Overwrite --- # This will cause a copy operation, but the data.flags will remain # the same, i.e. contiguous arrays will remain contiguous. new.data[..., :] = new.data[..., new_order] if False: # Benchmark different methods, using slicing tricks seems # to have the best default behavior import timerit ti = timerit.Timerit(100, bestof=10, verbose=2) for timer in ti.reset('method2-apply'): new = self.copy() with timer: new.data[..., :] = new.data[..., new_order] for timer in ti.reset('method1-apply'): new = self.copy() with timer: new.data = new.data[..., new_order] for timer in ti.reset('method2-use'): new = self.copy() new.data[..., :] = new.data[..., new_order] with timer: new.data += 10 for timer in ti.reset('method1-use'): new = self.copy() new.data = new.data[..., new_order] with timer: new.data += 10 return new
def benchmark(): """ apt-get install xxhash """ import timerit import ubelt as ub from kwcoco.util.util_futures import JobPool # NOQA ti = timerit.Timerit(1, bestof=1, verbose=3) max_workers = 6 fpath_demodata = _demodata_files() for timer in ti.reset('hash_file(hasher=xx32)'): with timer: for fpath in fpath_demodata: ub.hash_file(fpath, hasher='xx32') for timer in ti.reset('hash_file(hasher=xx64)'): with timer: for fpath in fpath_demodata: ub.hash_file(fpath, hasher='xx64') for timer in ti.reset('hash_file(hasher=xxhash) - serial'): # jobs = JobPool(mode='thread', max_workers=2) jobs = JobPool(mode='serial', max_workers=max_workers) with timer: for fpath in fpath_demodata: jobs.submit(ub.hash_file, fpath, hasher='xxhash') results = [job.result() for job in jobs.jobs] for timer in ti.reset('hash_file(hasher=xxhash) - thread'): # jobs = JobPool(mode='thread', max_workers=2) jobs = JobPool(mode='thread', max_workers=max_workers) with timer: for fpath in fpath_demodata: jobs.submit(ub.hash_file, fpath, hasher='xx64') results = [job.result() for job in jobs.jobs] for timer in ti.reset('hash_file(hasher=xxhash) - process'): # jobs = JobPool(mode='thread', max_workers=2) jobs = JobPool(mode='process', max_workers=max_workers) with timer: for fpath in fpath_demodata: jobs.submit(ub.hash_file, fpath, hasher='xx64') results = [job.result() for job in jobs.jobs] for timer in ti.reset('cmd-xxh32sum'): with timer: for fpath in fpath_demodata: ub.cmd(['xxh32sum', fpath])['out'].split(' ')[0] for timer in ti.reset('cmd-xxh64sum'): with timer: for fpath in fpath_demodata: ub.cmd(['xxh64sum', fpath])['out'].split(' ')[0] for timer in ti.reset('cmd-xxh64sum-detatch'): with timer: jobs = [ ub.cmd(['xxh64sum', fpath], detatch=True) for fpath in fpath_demodata ] results = [ job['proc'].communicate()[0].split(' ')[0] for job in jobs ] for timer in ti.reset('cmd-sha1sum'): with timer: for fpath in fpath_demodata: ub.cmd(['sha1sum', fpath])['out'].split(' ')[0] for timer in ti.reset('hash_file(hasher=sha1)'): with timer: for fpath in fpath_demodata: ub.hash_file(fpath, hasher='sha1')
def benchmark_nested_break(): """ There are several ways to do a nested break, but which one is best? https://twitter.com/nedbat/status/1515345787563220996 """ import ubelt as ub import pandas as pd import timerit import itertools as it def method1_itertools(iter1, iter2): for i, j in it.product(iter1, iter2): if i == 20 and j == 20: break def method2_except(iter1, iter2): class Found(Exception): pass try: for i in iter1: for j in iter2: if i == 20 and j == 20: raise Found except Found: pass class FoundPredef(Exception): pass def method2_5_except_predef(iter1, iter2): try: for i in iter1: for j in iter2: if i == 20 and j == 20: raise FoundPredef except FoundPredef: pass def method3_gendef(iter1, iter2): def genfunc(): for i in iter1: for j in iter2: yield i, j for i, j in genfunc(): if i == 20 and j == 20: break def method4_genexp(iter1, iter2): genexpr = ((i, j) for i in iter1 for j in iter2) for i, j in genexpr: if i == 20 and j == 20: break method_lut = locals() # can populate this some other way # Change params here to modify number of trials ti = timerit.Timerit(1000, bestof=10, verbose=1) # if True, record every trail run and show variance in seaborn # if False, use the standard timerit min/mean measures RECORD_ALL = True # These are the parameters that we benchmark over import numpy as np basis = { 'method': ['method1_itertools', 'method2_except', 'method2_5_except_predef', 'method3_gendef', 'method4_genexp'], # 'n1': np.logspace(1, np.log2(100), 30, base=2).astype(int), # 'n2': np.logspace(1, np.log2(100), 30, base=2).astype(int), 'size': np.logspace(1, np.log2(10000), 30, base=2).astype(int), 'input_style': ['range', 'list', 'customized_iter'], # 'param_name': [param values], } xlabel = 'size' xinput_labels = ['n1', 'n2', 'size'] # Set these to param labels that directly transfer to method kwargs kw_labels = [] # Set these to empty lists if they are not used group_labels = { 'style': ['input_style'], 'size': [], } group_labels['hue'] = list( (ub.oset(basis) - {xlabel} - xinput_labels) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) def make_input(params): # Given the parameterization make the benchmark function input # n1 = params['n1'] # n2 = params['n2'] size = params['size'] n1 = int(np.sqrt(size)) n2 = int(np.sqrt(size)) if params['input_style'] == 'list': iter1 = list(range(n1)) iter2 = list(range(n1)) elif params['input_style'] == 'range': iter1 = range(n1) iter2 = range(n2) elif params['input_style'] == 'customized_iter': import random def rando1(): rng1 = random.Random(0) for _ in range(n1): yield rng1.randint(0, n2) def rando2(): rng2 = random.Random(1) for _ in range(n1): yield rng2.randint(0, n2) iter1 = rando1() iter2 = rando2() else: raise KeyError return {'iter1': iter1, 'iter2': iter2} # For each variation of your experiment, create a row. rows = [] for params in grid_iter: # size = params['n1'] * params['n2'] # params['size'] = size group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.repr2( ub.dict_isect(params, labels), compact=1, si=1) key = ub.repr2(params, compact=1, si=1) # Make any modifications you need to compute input kwargs for each # method here. kwargs = ub.dict_isect(params.copy(), kw_labels) method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... kwargs.update(make_input(params)) with timer: # Put the logic you want to time here method(**kwargs) if RECORD_ALL: # Seaborn will show the variance if this is enabled, otherwise # use the robust timerit mean / min times # chunk_iter = ub.chunks(ti.times, ti.bestof) # times = list(map(min, chunk_iter)) # TODO: timerit method for this times = ti.robust_times() for time in times: row = { # 'mean': ti.mean(), 'time': time, 'key': key, **group_keys, **params, } rows.append(row) else: row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **group_keys, **params, } rows.append(row) time_key = 'time' if RECORD_ALL else 'min' # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) data = data.sort_values(time_key) if RECORD_ALL: # Show the min / mean if we record all min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1) mean_times = data.groupby('key')[['time']].mean().rename({'time': 'mean'}, axis=1) stats_data = pd.concat([min_times, mean_times], axis=1) stats_data = stats_data.sort_values('min') else: stats_data = data USE_OPENSKILL = 1 if USE_OPENSKILL: # Lets try a real ranking method # https://github.com/OpenDebates/openskill.py import openskill method_ratings = {m: openskill.Rating() for m in basis['method']} other_keys = sorted(set(stats_data.columns) - {'key', 'method', 'min', 'mean', 'hue_key', 'size_key', 'style_key'}) for params, variants in stats_data.groupby(other_keys): variants = variants.sort_values('mean') ranking = variants['method'].reset_index(drop=True) mean_speedup = variants['mean'].max() / variants['mean'] stats_data.loc[mean_speedup.index, 'mean_speedup'] = mean_speedup min_speedup = variants['min'].max() / variants['min'] stats_data.loc[min_speedup.index, 'min_speedup'] = min_speedup if USE_OPENSKILL: # The idea is that each setting of parameters is a game, and each # "method" is a player. We rank the players by which is fastest, # and update their ranking according to the Weng-Lin Bayes ranking # model. This does not take the fact that some "games" (i.e. # parameter settings) are more important than others, but it should # be fairly robust on average. old_ratings = [[r] for r in ub.take(method_ratings, ranking)] new_values = openskill.rate(old_ratings) # Not inplace new_ratings = [openskill.Rating(*new[0]) for new in new_values] method_ratings.update(ub.dzip(ranking, new_ratings)) print('Statistics:') print(stats_data) if USE_OPENSKILL: from openskill import predict_win win_prob = predict_win([[r] for r in method_ratings.values()]) skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False) print('method_ratings = {}'.format(ub.repr2(method_ratings, nl=1))) print('Aggregated Rankings =\n{}'.format(skill_agg)) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plt = kwplot.autoplt() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y=time_key, marker='o', ax=ax, **plotkw) ax.set_title(f'Benchmark Nested Breaks: #Trials {ti.num}, bestof {ti.bestof}') ax.set_xlabel(f'{xlabel}') ax.set_ylabel('Time') ax.set_xscale('log') ax.set_yscale('log') try: __IPYTHON__ except NameError: plt.show()