def test_entropy(use_handle): handle, stream = get_handle(use_handle) # The outcome of a fair coin is the most uncertain: # in base 2 the result is 1 (One bit of entropy). cluster = np.array([0, 1], dtype=np.int32) assert_almost_equal(entropy(cluster, base=2., handle=handle), 1.) # The outcome of a biased coin is less uncertain: cluster = np.array(([0] * 9) + [1], dtype=np.int32) assert_almost_equal(entropy(cluster, base=2., handle=handle), 0.468995593) # base e assert_almost_equal(entropy(cluster, handle=handle), 0.32508297339144826)
def test_entropy_random(n_samples, base, use_handle): handle, stream = get_handle(use_handle) clustering, _ = \ generate_random_labels(lambda rng: rng.randint(0, 1000, n_samples)) # generate unormalized probabilities from clustering pk = np.bincount(clustering) # scipy's entropy uses probabilities sp_S = sp_entropy(pk, base=base) # we use a clustering S = entropy(np.array(clustering, dtype=np.int32), base, handle=handle) assert_almost_equal(S, sp_S, decimal=2)
def test_entropy_random(n_samples, base, use_handle): if has_scipy(): from scipy.stats import entropy as sp_entropy else: pytest.skip('Skipping test_entropy_random because Scipy is missing') handle, stream = get_handle(use_handle) clustering, _, _, _ = \ generate_random_labels(lambda rng: rng.randint(0, 1000, n_samples)) # generate unormalized probabilities from clustering pk = np.bincount(clustering) # scipy's entropy uses probabilities sp_S = sp_entropy(pk, base=base) # we use a clustering S = entropy(np.array(clustering, dtype=np.int32), base, handle=handle) assert_almost_equal(S, sp_S, decimal=2)