def test_setitem3(): outcomes = (0, 1) pmf = [1 / 2, 1 / 2] d = ScalarDistribution(outcomes, pmf, sample_space=(0, 1, 2)) d[2] = 1 / 2 d.normalize() assert np.allclose(d.pmf, [1 / 3] * 3)
def test_setitem1(): pmf = [1 / 2, 1 / 2] d = ScalarDistribution(pmf) d[0] = 1 d[1] = 0 d.make_sparse() assert_equal(d.outcomes, (0,))
def test_setitem1(): pmf = [1 / 2, 1 / 2] d = ScalarDistribution(pmf) d[0] = 1 d[1] = 0 d.make_sparse() assert d.outcomes == (0, )
def test_setitem3(): outcomes = (0, 1) pmf = [1 / 2, 1 / 2] d = ScalarDistribution(outcomes, pmf, sample_space=(0, 1, 2)) d[2] = 1 / 2 d.normalize() assert_array_almost_equal(d.pmf, [1 / 3] * 3)
def test_J6(): """ Test a property of J from result 2 of the paper with base 10 """ for i in range(2, 10): d = SD([1 / i] * i) d.set_base(10) yield assert_almost_equal, J(d), (i - 1) * (np.log10(i) - np.log10(i - 1))
def test_emd1(): """ """ sd1 = ScalarDistribution([0, 1, 2], [1 / 3, 1 / 3, 1 / 3]) sd2 = ScalarDistribution([0, 1, 2], [1, 0, 0], trim=False) emd = earth_movers_distance(sd1, sd2) assert emd == pytest.approx(1.0)
def test_init9(): outcomes = [0, 1, 2] pmf = [1 / 3] * 3 d1 = ScalarDistribution(outcomes, pmf) d2 = ScalarDistribution.from_distribution(d1, base=10) d1.set_base(10) assert_true(d1.is_approx_equal(d2))
def test_mul(): d1 = uniform_scalar_distribution(range(1, 3)) d2 = ScalarDistribution([1, 2, 4], [0.25, 0.5, 0.25]) d3 = ScalarDistribution([2, 4], [0.5, 0.5]) assert (d1 * d1).is_approx_equal(d2) assert (d1 * 2).is_approx_equal(d3) assert (2 * d1).is_approx_equal(d3)
def test_LMPR_complexity5(n): """ Test that peaked Distributions have zero complexity. """ d = ScalarDistribution([1] + [0] * (n - 1)) d.make_dense() d = Distribution.from_distribution(d) assert LMPR_complexity(d) == pytest.approx(0)
def test_disequilibrium6(n): """ Test that peaked Distributions have non-zero disequilibrium. """ d = ScalarDistribution([1] + [0]*(n-1)) d.make_dense() d = Distribution.from_distribution(d) assert disequilibrium(d) >= 0
def test_LMPR_complexity5(n): """ Test that peaked Distributions have zero complexity. """ d = ScalarDistribution([1] + [0]*(n-1)) d.make_dense() d = Distribution.from_distribution(d) assert LMPR_complexity(d) == pytest.approx(0)
def test_disequilibrium6(n): """ Test that peaked Distributions have non-zero disequilibrium. """ d = ScalarDistribution([1] + [0] * (n - 1)) d.make_dense() d = Distribution.from_distribution(d) assert disequilibrium(d) >= 0
def test_disequilibrium6(): """ Test that peaked Distributions have non-zero disequilibrium. """ for n in range(2, 11): d = ScalarDistribution([1] + [0]*(n-1)) d.make_dense() d = Distribution.from_distribution(d) yield assert_greater, disequilibrium(d), 0
def test_LMPR_complexity4(): """ Test that peaked Distributions have zero complexity. """ for n in range(2, 11): d = ScalarDistribution([1] + [0]*(n-1)) d.make_dense() d = Distribution.from_distribution(d) yield assert_almost_equal, LMPR_complexity(d), 0
def test_to_string10(): # Basic d = ScalarDistribution([], sample_space=[0, 1], validate=False) s = d.to_string() s_ = """Class: ScalarDistribution Alphabet: (0, 1) Base: 2 x log p(x)""" assert s == s_
def test_to_string10(): # Basic d = ScalarDistribution([], sample_space=[0, 1], validate=False) s = d.to_string() s_ = """Class: ScalarDistribution Alphabet: (0, 1) Base: 2 x log p(x)""" assert_equal(s, s_)
def test_mss(): """ Test the construction of minimal sufficient statistics. """ d = get_gm() d1 = mss(d, [0, 1], [2, 3]) d2 = mss(d, [2, 3], [0, 1]) dist = ScalarDistribution([0, 1], [1 / 3, 2 / 3]) assert_true(dist.is_approx_equal(d1)) assert_true(dist.is_approx_equal(d2)) assert_true(d1.is_approx_equal(d2))
def test_floordiv(): d1 = uniform_scalar_distribution(range(1, 7)) d2 = ScalarDistribution([0, 1, 2, 3], [1 / 6, 1 / 3, 1 / 3, 1 / 6]) d3 = ScalarDistribution([1, 2, 3, 6], [1 / 2, 1 / 6, 1 / 6, 1 / 6]) d4 = uniform_scalar_distribution(range(1, 3)) d5 = ScalarDistribution( [0, 1, 2, 3, 4, 5, 6], [1 / 12, 1 / 4, 1 / 4, 1 / 6, 1 / 12, 1 / 12, 1 / 12]) assert (d1 // 2).is_approx_equal(d2) assert (6 // d1).is_approx_equal(d3) assert (d1 // d4).is_approx_equal(d5)
def test_init11(): outcomes = ['0', '1'] pmf = [1 / 2, 1 / 2] d = Distribution(outcomes, pmf) sd = ScalarDistribution.from_distribution(d) # Different sample space representations assert not d.is_approx_equal(sd)
def test_init11(): outcomes = ["0", "1"] pmf = [1 / 2, 1 / 2] d = Distribution(outcomes, pmf) sd = ScalarDistribution.from_distribution(d) # Different sample space representations assert_false(d.is_approx_equal(sd))
def test_to_string7(): # Basic outcomes = ['00', '01', '10', '11'] pmf = [1/4]*4 d = ScalarDistribution(outcomes, pmf) s = d.to_string() s_ = """Class: ScalarDistribution Alphabet: ('00', '01', '10', '11') Base: linear x p(x) 00 0.25 01 0.25 10 0.25 11 0.25""" assert_equal(s, s_)
def test_to_string7(): # Basic outcomes = ['00', '01', '10', '11'] pmf = [1/4]*4 d = ScalarDistribution(outcomes, pmf) s = d.to_string() s_ = """Class: ScalarDistribution Alphabet: ('00', '01', '10', '11') Base: linear x p(x) 00 0.25 01 0.25 10 0.25 11 0.25""" assert s == s_
def test_prepare_string2(): # Basic outcomes = ['00', '01', '10', '11'] pmf = [1 / 4] * 4 d = ScalarDistribution(outcomes, pmf) from dit.distribution import prepare_string assert_raises(ditException, prepare_string, d, str_outcomes=True)
def test_fanos_inequality(dist): """ H(X|Y) <= hb(P_e) + P_e log(|X| - 1) """ dist1 = SD.from_distribution(dist.marginal([0])) dist2 = SD.from_distribution(dist.marginal([1])) ce = H(dist, [0], [1]) X = len(set().union(dist1.outcomes, dist2.outcomes)) eq_dist = dist1 == dist2 P_e = eq_dist[False] if False in eq_dist else 0 hb = H(SD([P_e, 1 - P_e])) assert ce <= hb + P_e * np.log2(X - 1) + epsilon
def test_init12(): outcomes = ['0', '1'] pmf = [1 / 2, 1 / 2] d = Distribution(outcomes, pmf) sd = ScalarDistribution.from_distribution(d, base=10) d.set_base(10) # Different sample space representations assert_false(d.is_approx_equal(sd))
def test_mod(): d1 = uniform_scalar_distribution(range(1, 7)) d2 = uniform_scalar_distribution(range(2)) d3 = uniform_scalar_distribution(range(1, 3)) d4 = ScalarDistribution([0, 1], [3 / 4, 1 / 4]) assert (d1 % 2).is_approx_equal(d2) assert (5 % d3).is_approx_equal(d2) assert (d1 % d3).is_approx_equal(d4)
def test_init12(): outcomes = ['0', '1'] pmf = [1/2, 1/2] d = Distribution(outcomes, pmf) sd = ScalarDistribution.from_distribution(d, base=10) d.set_base(10) # Different sample space representations assert_false(d.is_approx_equal(sd))
def test_prepare_string1(): # Basic outcomes = ['00', '01', '10', '11'] pmf = [1/4]*4 d = ScalarDistribution(outcomes, pmf) from dit.distribution import prepare_string with pytest.raises(ditException): prepare_string(d, show_mask=True)
def test_fanos_inequality(dist): """ H(X|Y) <= hb(P_e) + P_e log(|X| - 1) """ dist1 = SD.from_distribution(dist.marginal([0])) dist2 = SD.from_distribution(dist.marginal([1])) ce = H(dist, [0], [1]) X = len(set().union(dist1.outcomes, dist2.outcomes)) eq_dist = dist1 == dist2 P_e = eq_dist[False] if False in eq_dist else 0 hb = H(SD([P_e, 1-P_e])) assert ce <= hb + P_e * np.log2(X - 1) + epsilon
def test_sub(): d1 = uniform_scalar_distribution(range(3)) d2 = uniform_scalar_distribution(range(1, 4)) d3 = ScalarDistribution([-2, -1, 0, 1, 2], [1 / 9, 2 / 9, 3 / 9, 2 / 9, 1 / 9]) assert (d2 - 1).is_approx_equal(d1) assert (3 - d2).is_approx_equal(d1) assert (d1).is_approx_equal(d2 - 1) assert (d1).is_approx_equal(3 - d2) assert (d1 - d1).is_approx_equal(d3)
def test_dist_from_induced(): """ Test dist_from_induced_sigalg """ outcomes = [(0, ), (1, ), (2, )] pmf = np.array([1 / 3] * 3) d = ScalarDistribution(outcomes, pmf) sigalg = frozenset(map(frozenset, d.event_space())) d2 = dist_from_induced_sigalg(d, sigalg) assert np.allclose(pmf, d2.pmf) sigalg = [(), ((0, ), ), ((1, ), (2, )), ((0, ), (1, ), (2, ))] sigalg = frozenset(map(frozenset, sigalg)) d2 = dist_from_induced_sigalg(d, sigalg, int_outcomes=True) pmf = np.array([1 / 3, 2 / 3]) assert np.allclose(pmf, d2.pmf) d2 = dist_from_induced_sigalg(d, sigalg, int_outcomes=False) outcomes = (((0, ), ), ((1, ), (2, ))) assert outcomes == d2.outcomes
def test_add(): d1 = uniform_scalar_distribution(range(3)) d2 = uniform_scalar_distribution(range(1, 4)) d3 = ScalarDistribution([0, 1, 2, 3, 4], [1 / 9, 2 / 9, 3 / 9, 2 / 9, 1 / 9]) assert (d1 + 1).is_approx_equal(d2) assert (1 + d1).is_approx_equal(d2) assert (d2).is_approx_equal(d1 + 1) assert (d2).is_approx_equal(1 + d1) assert (d1 + d1).is_approx_equal(d3)
def test_dist_from_induced(): """ Test dist_from_induced_sigalg """ outcomes = [(0,), (1,), (2,)] pmf = np.array([1/3] * 3) d = ScalarDistribution(outcomes, pmf) sigalg = frozenset(map(frozenset, d.event_space())) d2 = dist_from_induced_sigalg(d, sigalg) assert np.allclose(pmf, d2.pmf) sigalg = [(), ((0,),), ((1,), (2,)), ((0,), (1,), (2,))] sigalg = frozenset(map(frozenset, sigalg)) d2 = dist_from_induced_sigalg(d, sigalg, int_outcomes=True) pmf = np.array([1/3, 2/3]) assert np.allclose(pmf, d2.pmf) d2 = dist_from_induced_sigalg(d, sigalg, int_outcomes=False) outcomes = (((0,),), ((1,), (2,))) assert outcomes == d2.outcomes
def test_div(): d1 = uniform_scalar_distribution([2, 4, 6]) d2 = uniform_scalar_distribution([1, 2, 3, 4, 6]) d3 = uniform_scalar_distribution([1, 2, 3]) d4 = uniform_scalar_distribution([12, 6, 4, 3, 2]) d5 = uniform_scalar_distribution([1, 2]) d6 = ScalarDistribution([1, 2, 3, 4, 6], [1 / 6, 1 / 3, 1 / 6, 1 / 6, 1 / 6]) assert (d1 / 2).is_approx_equal(d3) assert (12 / d2).is_approx_equal(d4) assert (d1 / d5).is_approx_equal(d6)
def test_init9(): outcomes = [0, 1, 2] pmf = [1 / 3] * 3 d1 = ScalarDistribution(outcomes, pmf) d2 = ScalarDistribution.from_distribution(d1, base=10) d1.set_base(10) assert d1.is_approx_equal(d2)
def dist_from_induced_sigalg(dist, sigalg, int_outcomes=True): """ Returns the distribution associated with an induced sigma algebra. The sigma algebra is induced by a random variable from a probability space defined by `dist`. Parameters ---------- dist : Distribution The distribution which defines the base sigma-algebra. sigalg : frozenset A sigma-algebra induced by a random variable from `dist`. int_outcomes : bool If `True`, then the outcomes of the induced distribution are relabeled as integers instead of the atoms of the induced sigma-algebra. Returns ------- d : ScalarDistribution The distribution of the induced sigma algebra. """ from dit import ScalarDistribution atoms = atom_set(sigalg) if int_outcomes: atoms = [sorted(atom) for atom in atoms] atoms.sort(key=quasilexico_key) pmf = [dist.event_probability(atom) for atom in atoms] if int_outcomes: outcomes = range(len(atoms)) else: # Outcomes must be sequences. outcomes = [tuple(sorted(atom)) for atom in atoms] d = ScalarDistribution(outcomes, pmf, base=dist.get_base()) return d
def test_init10(): outcomes = [] pmf = [] with pytest.raises(InvalidDistribution): ScalarDistribution(outcomes, pmf)
def test_init8(): outcomes = [0, 1, 2] pmf = [1 / 3] * 3 d1 = ScalarDistribution(outcomes, pmf) d2 = ScalarDistribution.from_distribution(d1) assert d1.is_approx_equal(d2)
def test_p2(i): """ Test some simple base cases using SD with varying bases """ d = SD([1/i]*i) d.set_base(i) assert P(d) == pytest.approx(i)
def test_J5(i): """ Test a property of J from result 2 of the paper with a log base """ d = SD([1/i]*i) d.set_base('e') assert J(d) == pytest.approx((i-1)*(np.log(i)-np.log(i-1)))
def test_init7(): outcomes = [0, 1, 2] pmf = [1 / 2, 1 / 2, 0] d = ScalarDistribution(outcomes, pmf, trim=True) assert len(d.outcomes) == 2
def test_p2(i): """ Test some simple base cases using SD with varying bases """ d = SD([1 / i] * i) d.set_base(i) assert P(d) == pytest.approx(i)
def test_J6(i): """ Test a property of J from result 2 of the paper with base 10 """ d = SD([1/i]*i) d.set_base(10) assert J(d) == pytest.approx((i-1)*(np.log10(i)-np.log10(i-1)))
def test_p2(): for i in range(2, 10): d = SD([1/i]*i) d.set_base(i) assert_almost_equal(P(d), i)
def test_p2(): """ Test some simple base cases using SD with varying bases """ for i in range(2, 10): d = SD([1 / i] * i) d.set_base(i) yield assert_almost_equal, P(d), i
def test_J8(): for i in range(3, 10): d = SD([1/i]*i) d.set_base(i) assert(J(d) < H(d))
def test_J6(): for i in range(2, 10): d = SD([1/i]*i) d.set_base(10) assert_almost_equal(J(d), (i-1)*(np.log10(i)-np.log10(i-1)))
def test_del2(): d = ScalarDistribution([1 / 2, 1 / 2]) d.make_dense() del d[1] d.normalize() assert_almost_equal(d[0], 1)
def test_has_outcome1(): d = ScalarDistribution([1, 0]) assert_true(d.has_outcome(1))
def test_has_outcome3(): d = ScalarDistribution([1, 0]) assert_false(d.has_outcome(2, null=False))
def test_J8(): """ Test a property of J from result 1 of the paper using log bases """ for i in range(3, 10): d = SD([1/i]*i) d.set_base(i) yield assert_true, J(d) < H(d)
def test_J8(i): """ Test a property of J from result 1 of the paper using log bases """ d = SD([1/i]*i) d.set_base(i) assert J(d) < H(d)
def test_is_approx_equal2(): d1 = ScalarDistribution([1 / 2, 1 / 2, 0]) d1.make_dense() d2 = ScalarDistribution([1 / 2, 0, 1 / 2]) d2.make_dense() assert_false(d1.is_approx_equal(d2))
def test_add_mul(): d1 = ScalarDistribution([1 / 3, 2 / 3]) d2 = ScalarDistribution([2 / 3, 1 / 3]) d3 = ScalarDistribution([1 / 2, 1 / 2]) d4 = 0.5 * (d1 + d2) assert_true(d3.is_approx_equal(d4))
def test_J8(): """ Test a property of J from result 1 of the paper using log bases """ for i in range(3, 10): d = SD([1 / i] * i) d.set_base(i) yield assert_true, J(d) < H(d)
def test_is_approx_equal3(): d1 = ScalarDistribution([1 / 2, 1 / 2], sample_space=(0, 1, 2)) d2 = ScalarDistribution([1 / 2, 1 / 2]) assert_false(d1.is_approx_equal(d2))
def test_H4(): """ Test entropy in base 10 """ d = SD([1 / 10] * 10) d.set_base(10) assert H(d) == pytest.approx(1.0)
def test_init8(): outcomes = [0, 1, 2] pmf = [1 / 3] * 3 d1 = ScalarDistribution(outcomes, pmf) d2 = ScalarDistribution.from_distribution(d1) assert_true(d1.is_approx_equal(d2))