def test_custom_metric(): with pytest.raises(ValueError) as excinfo: Benchmark(other_timer=True)(delayed(sleep)(0.1)) assert 'other_timer=True is not a callable' in str(excinfo.value) def custom_metric(obj): return sum(obj.compute()) bench = Benchmark(custom_metric=custom_metric) res = bench(delayed(range)(3)) assert res == {'custom_metric': 3}
def test_untaged_sequence(): with pytest.raises(ValueError) as excinfo: timeit(delayed(sleep)(0.1) for _ in range(2)) assert "please provide the tag parameter" in str(excinfo.value) with pytest.raises(ValueError) as excinfo: timeit([ delayed(sleep, tags={'a': 1})(0.1), delayed(sleep, tags={'a': 1})(0.1) ]) assert "but only 1 unique tags were found" in str(excinfo.value)
def test_progress_bar(capsys): timeit((delayed(sleep, tags={'N': idx})(0.1) for idx in range(2)), repeat=1) out, err = capsys.readouterr() out = out + err assert len(out) == 0 timeit((delayed(sleep, tags={'N': idx})(0.1) for idx in range(2)), progress_bar=1e-3, repeat=1) out, err = capsys.readouterr() out = out + err assert len(out) > 0 assert '100%' in out assert '2/2' in out
def benchmark_cases(X, y): for N in np.logspace(np.log10(n_samples / 1e3), np.log10(n_samples), 4).astype('int'): for es in early_stopping: tags = OrderedDict(N=N, early_stopping=es) clf = HistGradientBoostingClassifier(**options) yield delayed(clf.fit, tags=tags)(X[:N, :], y[:N])
def benchmark_cases(X, y): for N in np.logspace(np.log10(n_samples / 1e3), np.log10(n_samples), 4).astype('int'): for solver in solvers: tags = OrderedDict(N=N, solver=solver) clf = LogisticRegression(solver=solver, **options) yield delayed(clf.fit, tags=tags)(X[:N, :], y[:N])
def test_dataframe_conversion(repeat, aggregate): pd = pytest.importorskip('pandas') N = 2 metrics = ['peak_memory', 'wall_time'] bench = Benchmark(wall_time=True, peak_memory=True, repeat=repeat, aggregate=aggregate) res = bench(delayed(sleep, tags={'idx': idx})(0.04) for idx in range(N)) assert isinstance(res, pd.DataFrame) if aggregate: assert len(res) == N assert res.index.names == ['idx'] if repeat > 1: assert isinstance(res.columns, pd.MultiIndex) assert list(res.columns.levels[0]) == metrics assert list(res.columns.levels[1]) == aggregate else: assert isinstance(res.columns, pd.Index) else: assert len(res) == N * repeat if repeat > 1: assert res.index.names == ['idx', 'runid'] else: assert res.index.names == ['idx'] assert isinstance(res.columns, pd.Index) assert list(res.columns) == metrics
def test_get_args_kwargs(): def func(pos_arg, key_arg=None): pass delayed_obj = delayed(func)('arg', key_arg='kwarg') assert delayed_obj.get_args()[0] == 'arg' assert delayed_obj.get_kwargs()[0] == {'key_arg': 'kwarg'}
def benchmark_cases(): for N in np.logspace(np.log10(100), np.log10(n_samples), 5).astype('int'): for solver in ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']: tags = OrderedDict(N=N, solver=solver) model = delayed(LogisticRegression, tags=tags)( solver=solver, random_state=rng) yield model.fit(X[:N], y[:N])
def test_timeit_sequence(repeat): res = timeit((delayed(sleep, tags={'idx': idx})(0.1) for idx in range(2)), repeat=repeat, to_dataframe=False) assert isinstance(res, list) for row in res: assert 'wall_time' in row assert row['wall_time'] > 0
def test_set_env(): def func(): return os.environ.get('NEURTU_TEST', None) assert func() is None assert delayed(func, env={'NEURTU_TEST': 'true'})().compute() == 'true' assert func() is None
def test_repeat(): agg = ('mean', ) res = timeit(delayed(sleep)(0), repeat=2, aggregate=agg) pd = import_or_none('pandas') if pd is None: assert len(res) == 2 else: assert list(res.columns) == ['wall_time'] assert list(res.index) == list(agg)
def test_memit_array_allocation(): np = pytest.importorskip('numpy') N = 5000 double_size = np.ones(1).nbytes def allocate_array(): X = np.ones((N, N)) sleep(0.1) X[:] += 1 res = memit(delayed(allocate_array)()) assert res['peak_memory'] == approx(N**2 * double_size / 1024**2, rel=0.05)
def test_multiple_metrics(repeat): bench = Benchmark(wall_time=True, peak_memory=True, to_dataframe=False, repeat=repeat) res = bench(delayed(sleep)(0)) if repeat == 1: assert isinstance(res, dict) else: assert isinstance(res, list) len(res) == repeat assert isinstance(res[0], dict) res = res[0] for metric in ['wall_time', 'peak_memory']: assert metric in res assert res[metric] >= 0
def test_timeit_overhead(): dt = 0.2 res = timeit(delayed(sleep)(dt)) # overhead should be less than 500 us if sys.platform == 'win32': # precision of time.time on windows is 16 ms timer_precision = 25e-3 elif sys.platform == 'darwin': # for some reason on OS X time.sleep appears to be # quite inaccurate timer_precision = 80e-3 else: timer_precision = 5e-3 assert res['wall_time'] == approx(dt, abs=timer_precision)
def benchmark_sparse(): for n_features in [10000, 100000]: for n_samples in [5000, 20000, 100000]: for density in [0.01, 0.0001]: if density == 0.01 and n_features > 50000: continue X = make_sparse(n_samples, n_features, density) for n_components in [2, 20, 100]: for preconditioner in [None, 'lobpcg']: params = { 'n_components': n_components, 'n_samples': n_samples, 'n_features': n_features, 'nnz': X.nnz, 'density': density, 'preconditioner': str(preconditioner), } yield neurtu.delayed(randomized_svd, tags=params)( X, n_components=n_components, preconditioner=preconditioner)
def benchmark_dense(): for ratio in [10, 100, 1000, 2500, 5000, 7500, 10000]: for n_features in [50, 500, 1000]: # for n_features in [50, 500, 1000, 5000]: # for n_samples in [5000, 20000, 50000, 100000, 1000000]: n_samples = int(n_features * ratio) if n_features * n_samples > (10000 * 100000): continue rng = np.random.RandomState(42) X = rng.randn(n_samples, n_features) for n_components in [2, 10, 25, 50, 100]: if n_components >= n_features: continue for preconditioner in [None, 'lobpcg']: params = { 'n_components': n_components, 'n_samples': n_samples, 'n_features': n_features, 'preconditioner': str(preconditioner), } yield neurtu.delayed(randomized_svd, tags=params)( X, n_components=n_components, preconditioner=preconditioner)
def main(): yield neurtu.delayed(knn, tags={"al": "knn1"}).main(Xt, Yt, "path") yield neurtu.delayed(svm, tags={"al": "SVM"}).predict(X, Y) yield neurtu.delayed(knn1, tags={"al": "KNN"}).predict(X, Y) yield neurtu.delayed(decisionTree, tags={"al": "tree"}).predict(X, Y)
def test_memit_overhead(): res = memit(delayed(sleep)(0.1)) assert isinstance(res, dict) # measurement error is less than 1.0 MB assert res['peak_memory'] < 1.0
def test_wall_user_time(): pytest.importorskip('resource') res = timeit(delayed(sleep)(0), timer='cpu_time') assert 'cpu_time' in res
""" Time complexity of numpy.sort ============================= In this example we will look into the time complexity of :func:`numpy.sort` """ import numpy as np from neurtu import timeit, delayed rng = np.random.RandomState(42) df = timeit(delayed(np.sort, tags={'N': N, 'kind': kind})(rng.rand(N), kind=kind) for N in np.logspace(2, 5, num=5).astype('int') for kind in ["quicksort", "mergesort", "heapsort"]) print(df.to_string()) ############################################################################## # # we can use the pandas plotting API (that requires matplotlib) ax = df.wall_time.unstack().plot(marker='o') ax.set_xscale('log') ax.set_yscale('log') ax.set_ylabel('Wall time (s)') ax.set_title('Time complexity of numpy.sort')
def test_benchmark_env(): res = timeit(delayed(sleep, env={'NEURTU_TEST': 'true'})(0)) assert 'NEURTU_TEST' in res assert res['NEURTU_TEST'] == 'true'