def test_get_bootstrap_samples_multistat(spark_context, stack_depth=0):
    data = np.concatenate([np.zeros(10000), np.ones(10000)])
    res = mabsbb.get_bootstrap_samples(
        spark_context,
        data,
        lambda x, y: {
            'min': np.min(x),
            'max': np.max(x),
            'mean': np.dot(x, y),
        },
        num_samples=2
    )

    assert res.shape == (2, 3)

    assert (res['min'] == 0).all()
    assert (res['max'] == 1).all()
    assert res['mean'].iloc[0] == pytest.approx(np.mean(data), rel=1e-1)
    assert res['mean'].iloc[1] == pytest.approx(np.mean(data), rel=1e-1)

    # If we stuff up (duplicate) the seeds then things aren't random
    assert res['mean'].iloc[0] != res['mean'].iloc[1]

    if stack_depth >= 3:
        assert (res['mean'] != np.mean(data)).any()  # Extremely unlikely
    elif (res['mean'] == np.mean(data)).any():
        # Re-roll the dice a few times to make sure this was a fluke.
        test_get_bootstrap_samples_multistat(spark_context, stack_depth + 1)
def test_get_bootstrap_samples(spark_context):
    res = mabsbb.get_bootstrap_samples(
        spark_context, np.array([3., 3., 3.]), num_samples=2
    )
    assert res.shape == (2,)
    assert res[0] == pytest.approx(3.)
    assert res[1] == pytest.approx(3.)