Exemple #1
0
def test_custom_metric():
    with pytest.raises(ValueError) as excinfo:
        Benchmark(other_timer=True)(delayed(sleep)(0.1))
    assert 'other_timer=True is not a callable' in str(excinfo.value)

    def custom_metric(obj):
        return sum(obj.compute())

    bench = Benchmark(custom_metric=custom_metric)
    res = bench(delayed(range)(3))
    assert res == {'custom_metric': 3}
Exemple #2
0
def test_dataframe_conversion(repeat, aggregate):

    pd = pytest.importorskip('pandas')

    N = 2

    metrics = ['peak_memory', 'wall_time']

    bench = Benchmark(wall_time=True,
                      peak_memory=True,
                      repeat=repeat,
                      aggregate=aggregate)

    res = bench(delayed(sleep, tags={'idx': idx})(0.04) for idx in range(N))

    assert isinstance(res, pd.DataFrame)

    if aggregate:
        assert len(res) == N
        assert res.index.names == ['idx']
        if repeat > 1:
            assert isinstance(res.columns, pd.MultiIndex)
            assert list(res.columns.levels[0]) == metrics
            assert list(res.columns.levels[1]) == aggregate
        else:
            assert isinstance(res.columns, pd.Index)
    else:
        assert len(res) == N * repeat
        if repeat > 1:
            assert res.index.names == ['idx', 'runid']
        else:
            assert res.index.names == ['idx']
        assert isinstance(res.columns, pd.Index)
        assert list(res.columns) == metrics
Exemple #3
0
def test_multiple_metrics(repeat):

    bench = Benchmark(wall_time=True,
                      peak_memory=True,
                      to_dataframe=False,
                      repeat=repeat)
    res = bench(delayed(sleep)(0))

    if repeat == 1:
        assert isinstance(res, dict)
    else:
        assert isinstance(res, list)
        len(res) == repeat
        assert isinstance(res[0], dict)
        res = res[0]

    for metric in ['wall_time', 'peak_memory']:
        assert metric in res
        assert res[metric] >= 0
            tags = OrderedDict(N=N, early_stopping=es)
            clf = HistGradientBoostingClassifier(**options)
            yield delayed(clf.fit, tags=tags)(X[:N, :], y[:N])


# 1. Binary Histogram Gradient Booster
n_classes = 2
X, y = make_classification(
    n_samples=n_samples,
    n_features=n_features,
    n_informative=n_informative,
    n_classes=n_classes,
    random_state=42,
)

bench = Benchmark(**bench_options)
print("Run binary histogram gradient booster.")
df_hgbt_binary = bench(benchmark_cases(X, y))
df_hgbt_binary["estimator"] = "HistGradientBoostingClassifier"
df_hgbt_binary["n_classes"] = n_classes

# 2. Multiclass Histogram Gradient Booster
n_classes = 10
X, y = make_classification(
    n_samples=n_samples,
    n_features=n_features,
    n_informative=n_informative,
    n_classes=n_classes,
    random_state=42,
)
X = rng.rand(n_samples, n_features)
y = rng.randint(2, size=(n_samples))


def benchmark_cases():
    for N in np.logspace(np.log10(100), np.log10(n_samples), 5).astype('int'):
        for solver in ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']:
            tags = OrderedDict(N=N, solver=solver)
            model = delayed(LogisticRegression, tags=tags)(
                                solver=solver, random_state=rng)

            yield model.fit(X[:N], y[:N])


bench = Benchmark(wall_time=True, peak_memory=True)
df = bench(benchmark_cases())

print(df.tail())


##############################################################################
#
# The above section will run in approximately 1min, a progress bar will be
# displayed.
#
# We can use the pandas plotting API (that requires matplotlib) to visualize
# the results,

ax = df.wall_time.unstack().plot(marker='o')
ax.set_xscale('log')