Ejemplo n.º 1
0
def test_find_binning_thresholds_regular_data():
    data = np.linspace(0, 10, 1001)
    bin_thresholds = _find_binning_thresholds(data, max_bins=10)
    assert_allclose(bin_thresholds, [1, 2, 3, 4, 5, 6, 7, 8, 9])

    bin_thresholds = _find_binning_thresholds(data, max_bins=5)
    assert_allclose(bin_thresholds, [2, 4, 6, 8])
Ejemplo n.º 2
0
def test_find_binning_thresholds_small_regular_data():
    data = np.linspace(0, 10, 11)

    bin_thresholds = _find_binning_thresholds(data, max_bins=5)
    assert_allclose(bin_thresholds, [2, 4, 6, 8])

    bin_thresholds = _find_binning_thresholds(data, max_bins=10)
    assert_allclose(bin_thresholds, [1, 2, 3, 4, 5, 6, 7, 8, 9])

    bin_thresholds = _find_binning_thresholds(data, max_bins=11)
    assert_allclose(bin_thresholds, np.arange(10) + 0.5)

    bin_thresholds = _find_binning_thresholds(data, max_bins=255)
    assert_allclose(bin_thresholds, np.arange(10) + 0.5)
Ejemplo n.º 3
0
def test_find_binning_thresholds_low_n_bins():
    bin_thresholds = [
        _find_binning_thresholds(DATA[:, i], max_bins=128) for i in range(2)
    ]
    for i in range(len(bin_thresholds)):
        assert bin_thresholds[i].shape == (127,)  # 128 - 1
        assert bin_thresholds[i].dtype == DATA.dtype
Ejemplo n.º 4
0
def test_find_binning_thresholds_random_data():
    bin_thresholds = [
        _find_binning_thresholds(DATA[:, i], max_bins=255) for i in range(2)
    ]
    for i in range(len(bin_thresholds)):
        assert bin_thresholds[i].shape == (254,)  # 255 - 1
        assert bin_thresholds[i].dtype == DATA.dtype

    assert_allclose(
        bin_thresholds[0][[64, 128, 192]], np.array([-0.7, 0.0, 0.7]), atol=1e-1
    )

    assert_allclose(
        bin_thresholds[1][[64, 128, 192]], np.array([9.99, 10.00, 10.01]), atol=1e-2
    )
Ejemplo n.º 5
0
def test_map_to_bins(max_bins):
    bin_thresholds = [
        _find_binning_thresholds(DATA[:, i], max_bins=max_bins) for i in range(2)
    ]
    binned = np.zeros_like(DATA, dtype=X_BINNED_DTYPE, order="F")
    last_bin_idx = max_bins
    _map_to_bins(DATA, bin_thresholds, last_bin_idx, binned)
    assert binned.shape == DATA.shape
    assert binned.dtype == np.uint8
    assert binned.flags.f_contiguous

    min_indices = DATA.argmin(axis=0)
    max_indices = DATA.argmax(axis=0)

    for feature_idx, min_idx in enumerate(min_indices):
        assert binned[min_idx, feature_idx] == 0
    for feature_idx, max_idx in enumerate(max_indices):
        assert binned[max_idx, feature_idx] == max_bins - 1