def test_custom_metric(): with pytest.raises(ValueError) as excinfo: Benchmark(other_timer=True)(delayed(sleep)(0.1)) assert 'other_timer=True is not a callable' in str(excinfo.value) def custom_metric(obj): return sum(obj.compute()) bench = Benchmark(custom_metric=custom_metric) res = bench(delayed(range)(3)) assert res == {'custom_metric': 3}
def test_dataframe_conversion(repeat, aggregate): pd = pytest.importorskip('pandas') N = 2 metrics = ['peak_memory', 'wall_time'] bench = Benchmark(wall_time=True, peak_memory=True, repeat=repeat, aggregate=aggregate) res = bench(delayed(sleep, tags={'idx': idx})(0.04) for idx in range(N)) assert isinstance(res, pd.DataFrame) if aggregate: assert len(res) == N assert res.index.names == ['idx'] if repeat > 1: assert isinstance(res.columns, pd.MultiIndex) assert list(res.columns.levels[0]) == metrics assert list(res.columns.levels[1]) == aggregate else: assert isinstance(res.columns, pd.Index) else: assert len(res) == N * repeat if repeat > 1: assert res.index.names == ['idx', 'runid'] else: assert res.index.names == ['idx'] assert isinstance(res.columns, pd.Index) assert list(res.columns) == metrics
def test_multiple_metrics(repeat): bench = Benchmark(wall_time=True, peak_memory=True, to_dataframe=False, repeat=repeat) res = bench(delayed(sleep)(0)) if repeat == 1: assert isinstance(res, dict) else: assert isinstance(res, list) len(res) == repeat assert isinstance(res[0], dict) res = res[0] for metric in ['wall_time', 'peak_memory']: assert metric in res assert res[metric] >= 0
tags = OrderedDict(N=N, early_stopping=es) clf = HistGradientBoostingClassifier(**options) yield delayed(clf.fit, tags=tags)(X[:N, :], y[:N]) # 1. Binary Histogram Gradient Booster n_classes = 2 X, y = make_classification( n_samples=n_samples, n_features=n_features, n_informative=n_informative, n_classes=n_classes, random_state=42, ) bench = Benchmark(**bench_options) print("Run binary histogram gradient booster.") df_hgbt_binary = bench(benchmark_cases(X, y)) df_hgbt_binary["estimator"] = "HistGradientBoostingClassifier" df_hgbt_binary["n_classes"] = n_classes # 2. Multiclass Histogram Gradient Booster n_classes = 10 X, y = make_classification( n_samples=n_samples, n_features=n_features, n_informative=n_informative, n_classes=n_classes, random_state=42, )
X = rng.rand(n_samples, n_features) y = rng.randint(2, size=(n_samples)) def benchmark_cases(): for N in np.logspace(np.log10(100), np.log10(n_samples), 5).astype('int'): for solver in ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']: tags = OrderedDict(N=N, solver=solver) model = delayed(LogisticRegression, tags=tags)( solver=solver, random_state=rng) yield model.fit(X[:N], y[:N]) bench = Benchmark(wall_time=True, peak_memory=True) df = bench(benchmark_cases()) print(df.tail()) ############################################################################## # # The above section will run in approximately 1min, a progress bar will be # displayed. # # We can use the pandas plotting API (that requires matplotlib) to visualize # the results, ax = df.wall_time.unstack().plot(marker='o') ax.set_xscale('log')