def test_product_with_frequency_table_discrete_distribution():
    freq_table1 = FrequencyTable({"A": 3, "B": 4, "C": 7}, name="Y1")
    dist1 = DiscreteDistribution(sample_1, names=["X1", "X2", "X3", "X4"])

    # without common names
    dist3 = dist1 * freq_table1
    assert all(compare(dist3.names, ["X1", "X2", "X3", "X4", "Y1"]))
    assert dist3.total == (dist1.total * freq_table1.total)

    # check probabilites
    assert dist3.frequency(("a", "y", 2, 33, "B")) == 6 * 4
    assert dist3.probability(("a", "y", 2, 33, "B")) == 24 / 1708
    assert dist3[("a", "y", 2, 33, "B")] == 24

    dist3 = freq_table1 * dist1
    assert all(compare(dist3.names, ["Y1", "X1", "X2", "X3", "X4"]))
    assert dist3.total == (dist1.total * freq_table1.total)

    # check probabilites
    assert dist3.frequency(("B", "a", "y", 2, 33)) == 6 * 4
    assert dist3.probability(("B", "a", "y", 2, 33)) == 24 / 1708
    assert dist3[("B", "a", "y", 2, 33)] == 24

    # with common names
    freq_table1 = FrequencyTable({"x": 3, "y": 4}, name="X2")
    dist1 = DiscreteDistribution(sample_1, names=["X1", "X2", "X3", "X4"])

    dist3 = dist1 * freq_table1
    assert all(compare(dist3.names, ["X1", "X2", "X3", "X4"]))
    assert dist3.total == 52 * 3 + 70 * 4

    dist3 = freq_table1 * dist1
    assert all(compare(dist3.names, ["X2", "X1", "X3", "X4"]))
    assert dist3.total == 52 * 3 + 70 * 4
Beispiel #2
0
def test_entropy():
    # Binary distribution from samples
    # This is 50-50 samples with entropy = log2(2)
    samples = [1, 1, 2, 2, 1, 1, 2, 2]
    ft = FrequencyTable(samples)
    assert entropy(ft) == np.log2(2)

    dd = DiscreteDistribution(samples)
    assert entropy(dd) == np.log2(2)

    # This is 60-40 samples with entropy = 0.970950
    samples = [
        "Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Cat", "Cat", "Cat", "Cat"
    ]
    ft = FrequencyTable(samples)
    assert entropy(ft) == approx(0.970950)

    dd = DiscreteDistribution(samples)
    assert entropy(dd) == approx(0.970950)

    # Deterministic case
    samples = {"Dog": 10, "Cat": 0}
    ft = FrequencyTable(samples)
    assert entropy(ft) == 0

    dd = DiscreteDistribution(samples)
    assert entropy(dd) == 0

    # Multiple levels
    samples = {(1, 2): 150, (1, 3): 150, (2, 2): 300, (2, 3): 400}
    dd = DiscreteDistribution(samples)
    assert entropy(dd) == approx(1.8709505945)
def test_marginals_operator_discrete_distribution():
    # Four levels dist.
    samples = {
        ("a", "x", 1, 33): 1,
        ("a", "x", 2, 33): 2,
        ("a", "x", 1, 44): 3,
        ("a", "x", 2, 44): 4,
        ("a", "y", 1, 33): 5,
        ("a", "y", 2, 33): 6,
        ("a", "y", 1, 44): 7,
        ("a", "y", 2, 44): 8,
        ("b", "x", 1, 33): 9,
        ("b", "x", 2, 33): 10,
        ("b", "x", 1, 44): 11,
        ("b", "x", 2, 44): 12,
        ("b", "y", 1, 33): 13,
        ("b", "y", 2, 33): 14,
        ("b", "y", 1, 44): 15,
        ("b", "y", 2, 44): 16,
    }
    disc_dist = DiscreteDistribution(samples)
    assert (disc_dist << "X2").total == disc_dist.total
    assert (disc_dist << ("X2", "X3")).total == disc_dist.total
    assert (disc_dist << ("X2", "X3", "X4")).total == disc_dist.total

    assert all(
        compare((disc_dist << ("X1", "X2", "X4")).keys_as_list(), [1, 2]))
    assert all(
        compare((disc_dist << ("X1", "X2", "X3")).keys_as_list(), [33, 44]))
    assert all(
        compare((disc_dist << ("X2", "X3", "X4")).keys_as_list(), ["a", "b"]))
    assert all(
        compare(
            (disc_dist << ("X2", "X3")).keys_as_list(),
            [("a", 33), ("a", 44), ("b", 33), ("b", 44)],
        ))

    disc_dist = DiscreteDistribution(samples,
                                     names=["Age", "Sex", "Education", "City"])
    assert (disc_dist << ("Age")).total == disc_dist.total
    assert (disc_dist << ("Sex", "Education")).total == disc_dist.total
    assert (disc_dist << ("Sex", "Education", "City")).total == disc_dist.total

    assert all(
        compare((disc_dist << ("Age", "Sex", "City")).keys_as_list(), [1, 2]))
    assert all(
        compare((disc_dist << ("Age", "Sex", "Education")).keys_as_list(),
                [33, 44]))
    assert all(
        compare((disc_dist << ("Sex", "Education", "City")).keys_as_list(),
                ["a", "b"]))
    assert all(
        compare(
            (disc_dist << ("Sex", "Education")).keys_as_list(),
            [("a", 33), ("a", 44), ("b", 33), ("b", 44)],
        ))
Beispiel #4
0
def test_iterable_samples_discrete_distribution():
    samples = """It is a long established fact that a reader will be
     distracted by the readable content of a page when looking at its
     layout. The point of using Lorem Ipsum is that it has a more-or-less
     normal distribution of letters, as opposed to using 'Content here,
     content here', making it look like readable English."""
    dist = DiscreteDistribution(iter(samples))
    assert dist.total == len(samples)

    gen = (c for c in samples)
    dist = DiscreteDistribution(gen)
    assert dist.total == len(samples)
def test_conditional_discrete_distribution():
    # Four levels dist.
    samples = {
        ("a", "x", 1, 33): 1,
        ("a", "x", 2, 33): 2,
        ("a", "x", 1, 44): 3,
        ("a", "x", 2, 44): 4,
        ("a", "y", 1, 33): 5,
        ("a", "y", 2, 33): 6,
        ("a", "y", 1, 44): 7,
        ("a", "y", 2, 44): 8,
        ("b", "x", 1, 33): 9,
        ("b", "x", 2, 33): 10,
        ("b", "x", 1, 44): 11,
        ("b", "x", 2, 44): 12,
        ("b", "y", 1, 33): 13,
        ("b", "y", 2, 33): 14,
        ("b", "y", 1, 44): 15,
        ("b", "y", 2, 44): 16,
    }
    disc_dist = DiscreteDistribution(samples)
    con_disc_dist = disc_dist.condition_on("X2")
    assert all(compare(con_disc_dist.conditional_rvs.names, ["X2"]))
    assert all(
        compare(con_disc_dist.distributions["x"].names, ["X1", "X3", "X4"]))
    assert all(
        compare(con_disc_dist.distributions["y"].names, ["X1", "X3", "X4"]))

    assert con_disc_dist.frequency(("a", 1, 33), "x") == 1
    assert con_disc_dist.frequency(("a", 1, 33), "y") == 5
    assert con_disc_dist.frequency(("a", 1, 44), "x") == 3
    assert con_disc_dist.frequency(("a", 1, 44), "y") == 7
    assert con_disc_dist.frequency(("b", 1, 33), "x") == 9
    assert con_disc_dist.frequency(("b", 1, 33), "y") == 13
    assert con_disc_dist.frequency(("b", 1, 44), "x") == 11
    assert con_disc_dist.frequency(("b", 1, 44), "y") == 15
    assert con_disc_dist.frequency(("b", 2, 44), "x") == 12
    assert con_disc_dist.frequency(("b", 2, 33), "y") == 14

    assert con_disc_dist.probability(("a", 1, 33), "x") == 1 / 52
    assert con_disc_dist.probability(("a", 1, 33), "y") == 5 / 84
    assert con_disc_dist.probability(("a", 1, 44), "x") == 3 / 52
    assert con_disc_dist.probability(("a", 1, 44), "y") == 7 / 84
    assert con_disc_dist.probability(("b", 1, 33), "x") == 9 / 52
    assert con_disc_dist.probability(("b", 1, 33), "y") == 13 / 84
    assert con_disc_dist.probability(("b", 1, 44), "x") == 11 / 52
    assert con_disc_dist.probability(("b", 1, 44), "y") == 15 / 84
    assert con_disc_dist.probability(("b", 2, 44), "x") == 12 / 52
    assert con_disc_dist.probability(("b", 2, 33), "y") == 14 / 84
Beispiel #6
0
def test_levels_is_numeric_discrete_distribution():
    samples = {
        ("a", "x", 1, 33, 1.5): 1,
        ("a", "x", 2, 33, 1.5): 2,
        ("a", "x", 1, 44, 1.5): 3,
        ("a", "x", 2, 44, 1.5): 4,
        ("a", "y", 1, 33, 1.5): 5,
        ("a", "y", 2, 33, 1.5): 6,
        ("a", "y", 1, 44, 2.5): 7,
        ("a", "y", 2, 44, 2.5): 8,
        ("b", "x", 1, 33, 2.5): 9,
        ("b", "x", 2, 33, 2.5): 10,
        ("b", "x", 1, 44, 2.5): 11,
        ("b", "x", 2, 44, 2.5): 12,
        ("b", "y", 1, 33, 3.5): 13,
        ("b", "y", 2, 33, 3.5): 14,
        ("b", "y", 1, 44, 3.5): 15,
        ("b", "y", 2, 44, 3.5): 16,
    }

    dist = DiscreteDistribution(samples)
    # by index
    assert not dist.rvs[0].is_numeric
    assert not dist.rvs[1].is_numeric
    assert dist.rvs[2].is_numeric
    assert dist.rvs[3].is_numeric
    assert dist.rvs[4].is_numeric
    # by name
    assert not dist.rvs["X1"].is_numeric
    assert not dist.rvs["X2"].is_numeric
    assert dist.rvs["X3"].is_numeric
    assert dist.rvs["X4"].is_numeric
    assert dist.rvs["X5"].is_numeric
Beispiel #7
0
def test_numpy_array_discrete_distribution():
    # It is not numpy array
    with pytest.raises(ValueError):
        DiscreteDistribution.from_np_array({1, 2, 3})

    # It is not list of list
    with pytest.raises(ValueError):
        DiscreteDistribution.from_np_array([1, 2, 3])

    # list of list or numpy 2D array converts to tuples
    samples = np.r_[["A"] * 24, ["B"] * 48, ["C"] * 4, ["D"] * 7, ["E"] * 17]
    samples = samples.reshape((samples.shape[0], 1))
    dist = DiscreteDistribution.from_np_array(samples)
    assert dist.total == 100
    # It is important to use tuple as key
    # since list is not hashable
    assert dist.probability(("A", )) == 0.24
    assert dist.prob(X1=("A", )) == 0.24
def test_product_with_two_common_vars_discrete_distribution():
    dist1 = DiscreteDistribution(sample_1, names=["X1", "X2", "X3", "X4"])
    dist2 = DiscreteDistribution(sample_2, names=["X3", "X5", "X6", "X2"])

    dist3 = dist1 * dist2
    assert all(compare(dist3.names, ["X1", "X2", "X3", "X4", "X5", "X6"]))
    assert dist3.total == 24 * 36 + 28 * 100 + 40 * 164 + 30 * 51

    # check probabilites
    assert dist3.frequency(("a", "y", 2, 33, "high", "under")) == 6 * 25
    assert dist3.probability(("a", "y", 2, 33, "high", "under")) == 150 / 11754
    assert dist3[("a", "y", 2, 33, "high", "under")] == 150
    # check the case that the right does not have the common
    assert dist3.frequency(("a", "y", 2, 33, "low", "under")) == 0
    assert dist3.probability(("a", "y", 2, 33, "low", "under")) == 0
    assert dist3[("a", "y", 2, 33, "low", "under")] == 0
    # check the case that the left does not have the common
    assert dist3.frequency(("b", "y", 2, 33, "high", "under")) == 0
    assert dist3.probability(("b", "y", 2, 33, "high", "under")) == 0
    assert dist3[("b", "y", 2, 33, "high", "under")] == 0
def test_product_with_no_common_vars_discrete_distribution():

    dist1 = DiscreteDistribution(sample_1, names=["X1", "X2", "X3", "X4"])
    dist2 = DiscreteDistribution(sample_2, names=["Y1", "Y2", "Y3", "Y4"])

    dist3 = dist1 * dist2
    assert all(
        compare(dist3.names, ["X1", "X2", "X3", "X4", "Y1", "Y2", "Y3", "Y4"]))
    assert dist3.total == (dist1.total * dist2.total)

    # check probabilites
    assert dist3.frequency(("a", "x", 1, 33, 2, "high", "under", "x")) == 9
    assert dist3.probability(
        ("a", "x", 1, 33, 2, "high", "normal", "x")) == 10 / 42822
    assert dist3[("a", "x", 1, 33, 2, "high", "normal", "x")] == 10

    assert dist3.frequency(("b", "x", 1, 44, 1, "low", "over", "y")) == 253
    assert dist3.probability(
        ("b", "x", 1, 44, 1, "low", "over", "y")) == 253 / 42822
    assert dist3[("b", "x", 1, 44, 1, "low", "over", "y")] == 253
def test_product_with_one_common_var_discrete_distribution():

    dist1 = DiscreteDistribution(sample_1, names=["X1", "X2", "X3", "X4"])
    dist2 = DiscreteDistribution(sample_2, names=["X3", "X5", "X6", "X7"])

    dist3 = dist1 * dist2
    assert all(compare(dist3.names,
                       ["X1", "X2", "X3", "X4", "X5", "X6", "X7"]))
    assert dist3.total == (36 + 164) * 64 + (100 + 51) * 58

    # check probabilites
    assert dist3.frequency(("a", "x", 1, 33, "high", "normal", "x")) == 2
    assert dist3.probability(
        ("a", "x", 1, 33, "high", "normal", "x")) == 2 / 21558
    assert dist3[("a", "x", 1, 33, "high", "normal", "x")] == 2
    # check the case that the right does not have the common
    assert dist3.frequency(("b", "y", 2, 44, "high", "over", "y")) == 0
    assert dist3.probability(("b", "y", 2, 44, "high", "over", "y")) == 0
    assert dist3[("b", "y", 2, 44, "high", "over", "y")] == 0
    # check the case that the left does not have the common
    assert dist3.frequency(("b", "y", 2, 33, "high", "normal", "y")) == 0
    assert dist3.probability(("b", "y", 2, 33, "high", "normal", "y")) == 0
    assert dist3[("b", "y", 2, 33, "high", "normal", "y")] == 0
def test_conditional_operator_discrete_distribution():
    # Four levels dist.
    samples = {
        ("a", "x", 1, 33): 1,
        ("a", "x", 2, 33): 2,
        ("a", "x", 1, 44): 3,
        ("a", "x", 2, 44): 4,
        ("a", "y", 1, 33): 5,
        ("a", "y", 2, 33): 6,
        ("a", "y", 1, 44): 7,
        ("a", "y", 2, 44): 8,
        ("b", "x", 1, 33): 9,
        ("b", "x", 2, 33): 10,
        ("b", "x", 1, 44): 11,
        ("b", "x", 2, 44): 12,
        ("b", "y", 1, 33): 13,
        ("b", "y", 2, 33): 14,
        ("b", "y", 1, 44): 15,
        ("b", "y", 2, 44): 16,
    }
    disc_dist = DiscreteDistribution(samples)
    con_disc_dist = disc_dist | "X2"
    assert all(compare(con_disc_dist.conditional_rvs.names, ["X2"]))
    assert all(
        compare(con_disc_dist.distributions["x"].names, ["X1", "X3", "X4"]))
    assert all(
        compare(con_disc_dist.distributions["y"].names, ["X1", "X3", "X4"]))

    assert con_disc_dist.frequency(("a", 1, 33), "x") == 1
    assert con_disc_dist.frequency(("a", 1, 33), "y") == 5

    assert con_disc_dist.probability(("a", 1, 33), "x") == 1 / 52
    assert con_disc_dist.probability(("a", 1, 33), "y") == 5 / 84

    con_disc_dist = disc_dist | ("X2", "X3")
    assert all(compare(con_disc_dist.conditional_rvs.names, ["X2", "X3"]))
    assert all(
        compare(con_disc_dist.distributions[("x", 1)].names, ["X1", "X4"]))
    assert all(
        compare(con_disc_dist.distributions[("x", 2)].names, ["X1", "X4"]))
    assert all(
        compare(con_disc_dist.distributions[("y", 1)].names, ["X1", "X4"]))
    assert all(
        compare(con_disc_dist.distributions[("y", 2)].names, ["X1", "X4"]))
    assert con_disc_dist.frequency(("a", 33), ("x", 1)) == 1
    assert con_disc_dist.probability(("a", 33), ("x", 1)) == 1 / 24
Beispiel #12
0
def test_three_levels_discrete_distribution():
    dist = DiscreteDistribution({("A", "y", 1): 2})
    both_levels = zip(dist.levels(), [["A"], ["y"], [1]])
    for levels_1, levels_2 in both_levels:
        assert all(compare(levels_1, levels_2))
    assert dist.rvs.size == 3
    assert dist[("A", "y", 1)] == 2
    assert dist[("A", "x", 2)] == 0
    assert all(compare(dist.frequencies(normalised=True), [1]))
    assert all(compare(dist.frequencies(normalised=False), [2]))
    assert dist.prob(X1="A", X2="y", X3=1) == 1
    assert dist.prob(X1="A", X2="y", X3=2) == 0

    dist = DiscreteDistribution({("A", "x", 1): 2, ("A", "y", 1): 2})
    both_levels = zip(dist.levels(), [["A"], ["x", "y"], [1]])
    for levels_1, levels_2 in both_levels:
        assert all(compare(levels_1, levels_2))
    assert dist.rvs.size == 3
    assert dist[("A", "x", 1)] == 2
    assert dist[("A", "y", 1)] == 2
    assert all(compare(dist.frequencies(normalised=True), [0.5, 0.5]))
    assert all(compare(dist.frequencies(normalised=False), [2, 2]))
    assert dist.prob(X1="A", X2="x", X3=1) == 0.5
    assert dist.prob(X1="A", X2="y", X3=1) == 0.5
    assert dist.prob(X1="A", X2="y", X3=2) == 0

    dist = DiscreteDistribution({("A", "x", 1): 2, ("B", "y", 2): 2})
    both_levels = zip(dist.levels(), [["A", "B"], ["x", "y"], [1, 2]])
    for levels_1, levels_2 in both_levels:
        assert all(compare(levels_1, levels_2))
    assert dist.rvs.size == 3
    assert dist[("A", "x", 1)] == 2
    assert dist[("B", "y", 2)] == 2
    assert dist[("B", "y", 3)] == 0
    assert all(compare(dist.frequencies(normalised=True), [0.5, 0.5]))
    assert all(compare(dist.frequencies(normalised=False), [2, 2]))
    assert dist.prob(X1="A", X2="x", X3=1) == 0.5
    assert dist.prob(X1="B", X2="y", X3=2) == 0.5

    dist = DiscreteDistribution({
        ("A", "x", 1): 1,
        ("A", "y", 2): 2,
        ("B", "x", 1): 3
    })
    both_levels = zip(dist.levels(), [["A", "B"], ["x", "y"], [1, 2]])
    for levels_1, levels_2 in both_levels:
        assert all(compare(levels_1, levels_2))
    assert dist.rvs.size == 3
    assert dist[("A", "x", 1)] == 1
    assert dist[("A", "y", 2)] == 2
    assert dist[("B", "x", 1)] == 3
    assert dist["B"] == 0
    assert all(
        compare(dist.frequencies(normalised=True), [1 / 6, 2 / 6, 3 / 6]))
    assert all(compare(dist.frequencies(normalised=False), [1, 2, 3]))
    assert dist.prob(X1="A", X2="x", X3=1) == 1 / 6
    assert dist.prob(X1="A", X2="y", X3=2) == 2 / 6
    assert dist.prob(X1="B", X2="x", X3=1) == 3 / 6
    assert dist.prob(X1="B", X2="y", X3=2) == 0

    dist = DiscreteDistribution({
        ("A", "x", 1): 1,
        ("A", "y", 2): 2,
        ("B", "x", 1): 3,
        ("B", "y", 2): 4
    })
    both_levels = zip(dist.levels(), [["A", "B"], ["x", "y"], [1, 2]])
    for levels_1, levels_2 in both_levels:
        assert all(compare(levels_1, levels_2))
    assert dist.rvs.size == 3
    assert dist[("A", "x", 1)] == 1
    assert dist[("A", "y", 2)] == 2
    assert dist[("B", "x", 1)] == 3
    assert dist[("B", "y", 2)] == 4
    assert all(compare(dist.frequencies(normalised=True),
                       [0.1, 0.2, 0.3, 0.4]))
    assert all(compare(dist.frequencies(normalised=False), [1, 2, 3, 4]))
    assert dist.prob(X1="A", X2="x", X3=1) == 1 / 10
    assert dist.prob(X1="A", X2="y", X3=2) == 2 / 10
    assert dist.prob(X1="B", X2="x", X3=1) == 3 / 10
    assert dist.prob(X1="B", X2="y", X3=2) == 4 / 10

    dist = DiscreteDistribution({
        ("A", "x", 1): 1,
        ("A", "y", 2): 2,
        ("B", "x", 1): 3,
        ("B", "y", 2): 4,
        ("C", "y", 3): 5,
    })
    both_levels = zip(dist.levels(), [["A", "B", "C"], ["x", "y"], [1, 2, 3]])
    for levels_1, levels_2 in both_levels:
        assert all(compare(levels_1, levels_2))
    assert dist.rvs.size == 3
    assert dist[("A", "x", 1)] == 1
    assert dist[("A", "y", 2)] == 2
    assert dist[("B", "x", 1)] == 3
    assert dist[("B", "y", 2)] == 4
    assert dist[("C", "y", 3)] == 5
    assert all(
        compare(dist.frequencies(normalised=True),
                [1 / 15, 2 / 15, 3 / 15, 4 / 15, 5 / 15]))
    assert all(compare(dist.frequencies(normalised=False), [1, 2, 3, 4, 5]))
    assert dist.prob(X1="A", X2="x", X3=1) == 1 / 15
    assert dist.prob(X1="A", X2="y", X3=2) == 2 / 15
    assert dist.prob(X1="B", X2="x", X3=1) == 3 / 15
    assert dist.prob(X1="B", X2="y", X3=2) == 4 / 15
    assert dist.prob(X1="C", X2="y", X3=3) == 5 / 15

    dist = DiscreteDistribution({
        ("A", "x", 1): 1,
        ("A", "y", 2): 2,
        ("B", "x", 3): 3,
        ("B", "y", 3): 4,
        ("C", "z", 4): 5,
    })
    both_levels = zip(dist.levels(),
                      [["A", "B", "C"], ["x", "y", "z"], [1, 2, 3, 4]])
    for levels_1, levels_2 in both_levels:
        assert all(compare(levels_1, levels_2))
    assert dist.rvs.size == 3
    assert dist[("A", "x", 1)] == 1
    assert dist[("A", "y", 2)] == 2
    assert dist[("B", "x", 3)] == 3
    assert dist[("B", "y", 3)] == 4
    assert dist[("C", "z", 4)] == 5
    assert all(
        compare(dist.frequencies(normalised=True),
                [1 / 15, 2 / 15, 3 / 15, 4 / 15, 5 / 15]))
    assert all(compare(dist.frequencies(normalised=False), [1, 2, 3, 4, 5]))
    assert dist.prob(X1="A", X2="x", X3=1) == 1 / 15
    assert dist.prob(X1="A", X2="y", X3=2) == 2 / 15
    assert dist.prob(X1="B", X2="x", X3=3) == 3 / 15
    assert dist.prob(X1="B", X2="y", X3=3) == 4 / 15
    assert dist.prob(X1="C", X2="z", X3=4) == 5 / 15
def test_product_exceptions_discrete_distribution():
    dist1 = DiscreteDistribution(sample_1)
    with pytest.raises(ValueError):
        dist1.product(2)
def test_statistical_independence_frequency_table():
    # P(x,y,z) = P(x)P(y)P(z)
    # to check that, first, create a joint dist. by product
    # then marginalis and multiply again. The final must be equal
    # the joint
    # Note: the multi-variable distributions must be statistically
    #       independent
    s_1 = {
        ("x", 1): 1 * 6,
        ("x", 2): 1 * 4,
        ("y", 1): 9 * 6,
        ("y", 2): 9 * 4,
    }

    dist1 = DiscreteDistribution(s_1, names=["X1", "X2"])

    s_2 = {
        (1, "high", "under", "x"): 4 * 3 * 1 * 1,
        (1, "high", "normal", "x"): 4 * 3 * 2 * 1,
        (1, "high", "over", "x"): 4 * 3 * 3 * 1,
        (1, "high", "obese", "x"): 4 * 3 * 4 * 1,
        (1, "low", "under", "x"): 4 * 2 * 1 * 1,
        (1, "low", "normal", "x"): 4 * 2 * 2 * 1,
        (1, "low", "over", "x"): 4 * 2 * 3 * 1,
        (1, "low", "obese", "x"): 4 * 2 * 4 * 1,
        (2, "high", "under", "x"): 2 * 3 * 1 * 1,
        (2, "high", "normal", "x"): 2 * 3 * 2 * 1,
        (2, "high", "over", "x"): 2 * 3 * 3 * 1,
        (2, "high", "obese", "x"): 2 * 3 * 4 * 1,
        (2, "low", "under", "x"): 2 * 2 * 1 * 1,
        (2, "low", "normal", "x"): 2 * 2 * 2 * 1,
        (2, "low", "over", "x"): 2 * 2 * 3 * 1,
        (2, "low", "obese", "x"): 2 * 2 * 4 * 1,
        (1, "high", "under", "y"): 4 * 3 * 1 * 3,
        (1, "high", "normal", "y"): 4 * 3 * 2 * 3,
        (1, "high", "over", "y"): 4 * 3 * 3 * 3,
        (1, "high", "obese", "y"): 4 * 3 * 4 * 3,
        (1, "low", "under", "y"): 4 * 2 * 1 * 3,
        (1, "low", "normal", "y"): 4 * 2 * 2 * 3,
        (1, "low", "over", "y"): 4 * 2 * 3 * 3,
        (1, "low", "obese", "y"): 4 * 2 * 4 * 3,
        (2, "high", "under", "y"): 2 * 3 * 1 * 3,
        (2, "high", "normal", "y"): 2 * 3 * 2 * 3,
        (2, "high", "over", "y"): 2 * 3 * 3 * 3,
        (2, "high", "obese", "y"): 2 * 3 * 4 * 3,
        (2, "low", "under", "y"): 2 * 2 * 1 * 3,
        (2, "low", "normal", "y"): 2 * 2 * 2 * 3,
        (2, "low", "over", "y"): 2 * 2 * 3 * 3,
        (2, "low", "obese", "y"): 2 * 2 * 4 * 3,
    }
    dist2 = DiscreteDistribution(s_2, names=["Y1", "Y2", "Y3", "Y4"])
    freq_table3 = FrequencyTable({11: 2, 22: 4, 33: 3}, name="Z")

    joint_dist = dist1 * dist2 * freq_table3

    marginals = []
    for name in joint_dist.names:
        names_except_one = list(set(joint_dist.names) - {name})
        marginal = joint_dist.marginal(*names_except_one)
        marginals.append(marginal)

    joint_dist2 = np.product(marginals)

    for k1 in joint_dist:
        assert joint_dist.probability(k1) == joint_dist2.probability(k1)

    # Test by normalising the distributions
    dist1.normalise()
    dist2.normalise()
    freq_table3.normalise()

    joint_dist = dist1 * dist2 * freq_table3

    marginals = []
    for name in joint_dist.names:
        names_except_one = list(set(joint_dist.names) - {name})
        marginal = joint_dist.marginal(*names_except_one)
        marginals.append(marginal)

    joint_dist2 = np.product(marginals)

    for k1 in joint_dist:
        assert joint_dist.probability(k1) == approx(
            joint_dist2.probability(k1), abs=1e-16)
        assert joint_dist[k1] == approx(joint_dist2[k1], abs=1e-16)
def test_reduce_by_name_discrete_distribution():
    samples = {
        ("a", "x", 1, 33): 1,
        ("a", "x", 2, 33): 2,
        ("a", "x", 1, 44): 3,
        ("a", "x", 2, 44): 4,
        ("a", "y", 1, 33): 5,
        ("a", "y", 2, 33): 6,
        ("a", "y", 1, 44): 7,
        ("a", "y", 2, 44): 8,
        ("b", "x", 1, 33): 9,
        ("b", "x", 2, 33): 10,
        ("b", "x", 1, 44): 11,
        ("b", "x", 2, 44): 12,
        ("b", "y", 1, 33): 13,
        ("b", "y", 2, 33): 14,
        ("b", "y", 1, 44): 15,
        ("b", "y", 2, 44): 16,
    }
    disc_dist = DiscreteDistribution(samples)
    reduced_dist = disc_dist.reduce(X2="y")
    assert reduced_dist.rvs.size == 3
    assert all(compare(reduced_dist.rvs.names, ["X1", "X3", "X4"]))
    assert reduced_dist[("a", 1, 33)] == 5
    assert reduced_dist[("b", 2, 44)] == 16
    assert reduced_dist.frequency(("a", 1, 33)) == 5
    assert reduced_dist.frequency(("b", 2, 44)) == 16
    assert reduced_dist.probability(("a", 1, 33)) == 5 / 84
    assert reduced_dist.probability(("b", 2, 44)) == 16 / 84

    reduced_dist = disc_dist.reduce(X2="y", X3=1)
    assert reduced_dist.rvs.size == 2
    assert all(compare(reduced_dist.rvs.names, ["X1", "X4"]))
    assert reduced_dist[("a", 33)] == 5
    assert reduced_dist[("b", 44)] == 15
    assert reduced_dist.frequency(("a", 33)) == 5
    assert reduced_dist.frequency(("b", 44)) == 15
    assert reduced_dist.probability(("a", 33)) == 5 / 40
    assert reduced_dist.probability(("b", 44)) == 15 / 40

    reduced_dist = disc_dist.reduce(X1="b", X3=1, X4=44)
    assert reduced_dist.rvs.size == 1
    assert all(compare(reduced_dist.rvs.names, ["X2"]))
    assert reduced_dist["x"] == 11
    assert reduced_dist["y"] == 15
    assert reduced_dist.frequency("x") == 11
    assert reduced_dist.frequency("y") == 15
    assert reduced_dist.probability("x") == 11 / 26
    assert reduced_dist.probability("y") == 15 / 26

    disc_dist = DiscreteDistribution(samples, names=["Y", "Z", "W", "X"])

    reduced_dist = disc_dist.reduce(Z="y")
    assert reduced_dist.rvs.size == 3
    assert all(compare(reduced_dist.rvs.names, ["Y", "W", "X"]))
    assert reduced_dist[("a", 1, 33)] == 5
    assert reduced_dist[("b", 2, 44)] == 16
    assert reduced_dist.frequency(("a", 1, 33)) == 5
    assert reduced_dist.frequency(("b", 2, 44)) == 16
    assert reduced_dist.probability(("a", 1, 33)) == 5 / 84
    assert reduced_dist.probability(("b", 2, 44)) == 16 / 84

    reduced_dist = disc_dist.reduce(Z="y", W=1)
    assert reduced_dist.rvs.size == 2
    assert all(compare(reduced_dist.rvs.names, ["Y", "X"]))
    assert reduced_dist[("a", 33)] == 5
    assert reduced_dist[("b", 44)] == 15
    assert reduced_dist.frequency(("a", 33)) == 5
    assert reduced_dist.frequency(("b", 44)) == 15
    assert reduced_dist.probability(("a", 33)) == 5 / 40
    assert reduced_dist.probability(("b", 44)) == 15 / 40

    reduced_dist = disc_dist.reduce(Y="b", W=1, X=44)
    assert reduced_dist.rvs.size == 1
    assert all(compare(reduced_dist.rvs.names, ["Z"]))
    assert reduced_dist["x"] == 11
    assert reduced_dist["y"] == 15
    assert reduced_dist.frequency("x") == 11
    assert reduced_dist.frequency("y") == 15
    assert reduced_dist.probability("x") == 11 / 26
    assert reduced_dist.probability("y") == 15 / 26
def test_marginals_discrete_distribution():
    # Single RV dist.
    with pytest.raises(ValueError):
        disc_dist = DiscreteDistribution({"A": 2, "B": 3, "C": 4})
        disc_dist.marginal("X1")

    # Two levels dist.
    samples = {(1, 1): 4, (1, 2): 4, (2, 1): 6, (2, 2): 6}
    disc_dist = DiscreteDistribution(samples)
    disc_dist2 = disc_dist.marginal("X1")
    assert disc_dist2.total == disc_dist.total
    assert all(compare(disc_dist2.keys_as_list(), [1, 2]))
    assert disc_dist2[1] == 10
    assert disc_dist2[2] == 10
    assert disc_dist2.probability(1) == 0.5
    assert disc_dist2.probability(2) == 0.5

    disc_dist2 = disc_dist.marginal("X2")
    assert disc_dist2.total == disc_dist.total
    assert all(compare(disc_dist2.keys_as_list(), [1, 2]))
    assert disc_dist2[1] == 8
    assert disc_dist2[2] == 12
    assert disc_dist2.probability(1) == 0.4
    assert disc_dist2.probability(2) == 0.6

    samples = {("a", "x"): 4, ("a", "y"): 4, ("b", "x"): 6, ("b", "y"): 6}
    disc_dist = DiscreteDistribution(samples)
    disc_dist2 = disc_dist.marginal("X1")
    assert disc_dist2.total == disc_dist.total
    assert all(compare(disc_dist2.keys_as_list(), ["x", "y"]))
    assert disc_dist2["x"] == 10
    assert disc_dist2["y"] == 10
    assert disc_dist2.probability("x") == 0.5
    assert disc_dist2.probability("y") == 0.5

    disc_dist2 = disc_dist.marginal("X1")
    assert disc_dist2.total == disc_dist.total
    assert all(compare(disc_dist2.keys_as_list(), ["x", "y"]))
    assert disc_dist2["x"] == 10
    assert disc_dist2["y"] == 10
    assert disc_dist2.probability("x") == 0.5
    assert disc_dist2.probability("y") == 0.5

    disc_dist2 = disc_dist.marginal("X2")
    assert disc_dist2.total == disc_dist.total
    assert all(compare(disc_dist2.keys_as_list(), ["a", "b"]))
    assert disc_dist2["a"] == 8
    assert disc_dist2["b"] == 12
    assert disc_dist2.probability("a") == 0.4
    assert disc_dist2.probability("b") == 0.6

    # Three levels dist.
    samples = {
        ("a", "x", 1): 4,
        ("a", "x", 2): 4,
        ("a", "y", 1): 6,
        ("a", "y", 2): 6,
        ("b", "x", 1): 8,
        ("b", "x", 2): 8,
        ("b", "y", 1): 10,
        ("b", "y", 2): 10,
    }
    disc_dist = DiscreteDistribution(samples)
    disc_dist2 = disc_dist.marginal("X1")
    assert disc_dist2.total == disc_dist.total
    assert all(
        compare(disc_dist2.keys_as_list(), [("x", 1), ("x", 2), ("y", 1),
                                            ("y", 2)]))
    assert disc_dist2[("x", 1)] == 12
    assert disc_dist2[("x", 2)] == 12
    assert disc_dist2[("y", 1)] == 16
    assert disc_dist2[("y", 2)] == 16
    assert disc_dist2.probability(("x", 1)) == 12 / 56
    assert disc_dist2.probability(("x", 2)) == 12 / 56
    assert disc_dist2.probability(("y", 1)) == 16 / 56
    assert disc_dist2.probability(("y", 2)) == 16 / 56

    disc_dist2 = disc_dist.marginal("X2")
    assert disc_dist2.total == disc_dist.total
    assert all(
        compare(disc_dist2.keys_as_list(), [("a", 1), ("a", 2), ("b", 1),
                                            ("b", 2)]))
    assert disc_dist2[("a", 1)] == 10
    assert disc_dist2[("a", 2)] == 10
    assert disc_dist2[("b", 1)] == 18
    assert disc_dist2[("b", 2)] == 18
    assert disc_dist2.probability(("a", 1)) == 10 / 56
    assert disc_dist2.probability(("a", 2)) == 10 / 56
    assert disc_dist2.probability(("b", 1)) == 18 / 56
    assert disc_dist2.probability(("b", 2)) == 18 / 56

    disc_dist2 = disc_dist.marginal("X3")
    assert disc_dist2.total == disc_dist.total
    assert all(
        compare(disc_dist2.keys_as_list(), [("a", "x"), ("a", "y"), ("b", "x"),
                                            ("b", "y")]))
    assert disc_dist2[("a", "x")] == 8
    assert disc_dist2[("a", "y")] == 12
    assert disc_dist2[("b", "x")] == 16
    assert disc_dist2[("b", "y")] == 20
    assert disc_dist2.probability(("a", "x")) == 8 / 56
    assert disc_dist2.probability(("a", "y")) == 12 / 56
    assert disc_dist2.probability(("b", "x")) == 16 / 56
    assert disc_dist2.probability(("b", "y")) == 20 / 56

    disc_dist2 = disc_dist.marginal("X1", "X2")
    assert disc_dist2.total == disc_dist.total
    assert all(compare(disc_dist2.keys_as_list(), [1, 2]))
    assert disc_dist2[1] == 28
    assert disc_dist2[2] == 28
    assert disc_dist2.probability(1) == 28 / 56
    assert disc_dist2.probability(2) == 28 / 56

    disc_dist2 = disc_dist.marginal("X1", "X3")
    assert disc_dist2.total == disc_dist.total
    assert all(compare(disc_dist2.keys_as_list(), ["x", "y"]))
    assert disc_dist2["x"] == 24
    assert disc_dist2["y"] == 32
    assert disc_dist2.probability("x") == 24 / 56
    assert disc_dist2.probability("y") == 32 / 56

    disc_dist2 = disc_dist.marginal("X2", "X3")
    assert disc_dist2.total == disc_dist.total
    assert all(compare(disc_dist2.keys_as_list(), ["a", "b"]))
    assert disc_dist2["a"] == 20
    assert disc_dist2["b"] == 36
    assert disc_dist2.probability("a") == 20 / 56
    assert disc_dist2.probability("b") == 36 / 56

    # Four levels dist.
    samples = {
        ("a", "x", 1, 33): 1,
        ("a", "x", 2, 33): 2,
        ("a", "x", 1, 44): 3,
        ("a", "x", 2, 44): 4,
        ("a", "y", 1, 33): 5,
        ("a", "y", 2, 33): 6,
        ("a", "y", 1, 44): 7,
        ("a", "y", 2, 44): 8,
        ("b", "x", 1, 33): 9,
        ("b", "x", 2, 33): 10,
        ("b", "x", 1, 44): 11,
        ("b", "x", 2, 44): 12,
        ("b", "y", 1, 33): 13,
        ("b", "y", 2, 33): 14,
        ("b", "y", 1, 44): 15,
        ("b", "y", 2, 44): 16,
    }
    disc_dist = DiscreteDistribution(samples)
    disc_dist2 = disc_dist.marginal("X3")
    assert disc_dist2.total == disc_dist.total
    assert all(
        compare(
            disc_dist2.keys_as_list(),
            [
                ("a", "x", 33),
                ("a", "x", 44),
                ("a", "y", 33),
                ("a", "y", 44),
                ("b", "x", 33),
                ("b", "x", 44),
                ("b", "y", 33),
                ("b", "y", 44),
            ],
        ))
    assert disc_dist2[("a", "x", 33)] == 3
    assert disc_dist2[("a", "x", 44)] == 7
    assert disc_dist2[("a", "y", 33)] == 11
    assert disc_dist2[("a", "y", 44)] == 15
    assert disc_dist2[("b", "x", 33)] == 19
    assert disc_dist2[("b", "x", 44)] == 23
    assert disc_dist2[("b", "y", 33)] == 27
    assert disc_dist2[("b", "y", 44)] == 31
    assert disc_dist2.probability(("a", "x", 33)) == 3 / 136
    assert disc_dist2.probability(("a", "x", 44)) == 7 / 136
    assert disc_dist2.probability(("a", "y", 33)) == 11 / 136
    assert disc_dist2.probability(("a", "y", 44)) == 15 / 136
    assert disc_dist2.probability(("b", "x", 33)) == 19 / 136
    assert disc_dist2.probability(("b", "x", 44)) == 23 / 136
    assert disc_dist2.probability(("b", "y", 33)) == 27 / 136
    assert disc_dist2.probability(("b", "y", 44)) == 31 / 136

    disc_dist2 = disc_dist.marginal("X4")
    assert disc_dist2.total == disc_dist.total
    assert all(
        compare(
            disc_dist2.keys_as_list(),
            [
                ("a", "x", 1),
                ("a", "x", 2),
                ("a", "y", 1),
                ("a", "y", 2),
                ("b", "x", 1),
                ("b", "x", 2),
                ("b", "y", 1),
                ("b", "y", 2),
            ],
        ))
    assert disc_dist2[("a", "x", 1)] == 4
    assert disc_dist2[("a", "x", 2)] == 6
    assert disc_dist2[("a", "y", 1)] == 12
    assert disc_dist2[("a", "y", 2)] == 14
    assert disc_dist2[("b", "x", 1)] == 20
    assert disc_dist2[("b", "x", 2)] == 22
    assert disc_dist2[("b", "y", 1)] == 28
    assert disc_dist2[("b", "y", 2)] == 30
    assert disc_dist2.probability(("a", "x", 1)) == 4 / 136
    assert disc_dist2.probability(("a", "x", 2)) == 6 / 136
    assert disc_dist2.probability(("a", "y", 1)) == 12 / 136
    assert disc_dist2.probability(("a", "y", 2)) == 14 / 136
    assert disc_dist2.probability(("b", "x", 1)) == 20 / 136
    assert disc_dist2.probability(("b", "x", 2)) == 22 / 136
    assert disc_dist2.probability(("b", "y", 1)) == 28 / 136
    assert disc_dist2.probability(("b", "y", 2)) == 30 / 136

    disc_dist2 = disc_dist.marginal("X1", "X4")
    assert disc_dist2.total == disc_dist.total
    assert all(
        compare(disc_dist2.keys_as_list(), [("x", 1), ("x", 2), ("y", 1),
                                            ("y", 2)]))
    assert disc_dist2[("x", 1)] == 24
    assert disc_dist2[("x", 2)] == 28
    assert disc_dist2[("y", 1)] == 40
    assert disc_dist2[("y", 2)] == 44
    assert disc_dist2.probability(("x", 1)) == 24 / 136
    assert disc_dist2.probability(("x", 2)) == 28 / 136
    assert disc_dist2.probability(("y", 1)) == 40 / 136
    assert disc_dist2.probability(("y", 2)) == 44 / 136

    disc_dist2 = disc_dist.marginal("X1", "X2", "X4")
    assert disc_dist2.total == disc_dist.total
    assert all(compare(disc_dist2.keys_as_list(), [1, 2]))
    assert disc_dist2[1] == 64
    assert disc_dist2[2] == 72
    assert disc_dist2.probability(1) == 64 / 136
    assert disc_dist2.probability(2) == 72 / 136

    # marginalize two times
    disc_dist2 = disc_dist.marginal("X1", "X4")
    disc_dist3 = disc_dist2.marginal("X2")
    assert disc_dist3.total == disc_dist.total
    assert all(compare(disc_dist3.keys_as_list(), [1, 2]))
    assert disc_dist3[1] == 64
    assert disc_dist3[2] == 72
    assert disc_dist3.probability(1) == 64 / 136
    assert disc_dist3.probability(2) == 72 / 136

    # marginalize three times
    disc_dist2 = disc_dist.marginal("X4")
    disc_dist3 = disc_dist2.marginal("X3")
    disc_dist4 = disc_dist3.marginal("X2")
    assert disc_dist4.total == disc_dist.total
    assert all(compare(disc_dist4.keys_as_list(), ["a", "b"]))
    assert disc_dist4["a"] == 36
    assert disc_dist4["b"] == 100
    assert disc_dist4.probability("a") == 36 / 136
    assert disc_dist4.probability("b") == 100 / 136
Beispiel #17
0
def test_avg_discrete_distribution():
    samples = {
        (1, 1, 1): 1,
        (1, 1, 2): 1,
        (1, 1, 3): 1,
        (1, 2, 1): 2,
        (1, 2, 2): 2,
        (1, 2, 3): 2,
        (1, 3, 1): 3,
        (1, 3, 2): 3,
        (1, 3, 3): 3,
    }
    dist = DiscreteDistribution(samples)
    assert all(compare(dist.avg(), [1, (3 + 12 + 27) / 18, 2]))
    assert all(compare(dist.avg(indices=[0, 1, 2]), [1,
                                                     (3 + 12 + 27) / 18, 2]))
    assert all(compare(dist.avg(indices=[0, 2, 1]), [1, 2,
                                                     (3 + 12 + 27) / 18]))
    assert all(compare(dist.avg(indices=[0, 1]), [1, (3 + 12 + 27) / 18]))
    assert all(compare(dist.avg(indices=[0, 2]), [1, 2]))
    assert all(compare(dist.avg(indices=[2, 0]), [2, 1]))
    assert all(compare(dist.avg(indices=[1, 2]), [(3 + 12 + 27) / 18, 2]))
    assert dist.avg(indices=[0]) == 1
    assert dist.std(indices=[0]) == 0
    assert dist.avg(indices=[1]) == (3 + 12 + 27) / 18
    assert dist.std(indices=[1]) == approx(0.55555555555556)
    assert dist.avg(indices=[2]) == 2
    assert dist.std(indices=[2]) == approx(0.66666666666667)
Beispiel #18
0
def test_keys_consistencies_discrete_distribution():
    with pytest.raises(ValueError):
        DiscreteDistribution([1, 2, 3, "A"], ["X1"], consistencies=True)

    with pytest.raises(ValueError):
        DiscreteDistribution(["A", 1, 2, 3], ["X1"], consistencies=True)

    with pytest.raises(ValueError):
        DiscreteDistribution([(1, ), (2, ), (3, ), (4, 5)], ["X1"],
                             consistencies=True)
    with pytest.raises(ValueError):
        DiscreteDistribution([(4, 5), (1, ), (2, ), (3, )], ["X1"],
                             consistencies=True)
    with pytest.raises(ValueError):
        DiscreteDistribution([(4, 5), (1, 3), (2, 3, 4), (3, 7)], ["X1"],
                             consistencies=True)
    with pytest.raises(ValueError):
        DiscreteDistribution(
            [("a", "1", "w1"), ("b", 2, "w1"), ("c", 3, "w2"), ("d", 4, "w2")],
            ["X1", "X2", "X3"],
            consistencies=True,
        )
    with pytest.raises(ValueError):
        DiscreteDistribution(
            [("a", 1, "w1"), ("b", "2", "w1"), ("c", 3, "w2"), ("d", 4, "w2")],
            ["X1", "X2", "X3"],
            consistencies=True,
        )
    with pytest.raises(ValueError):
        DiscreteDistribution(
            [("a", 1, "w1"), ("b", 2, "w1"), ("c", 3, "w2"), ("d", "4", "w2")],
            ["X1", "X2", "X3"],
            consistencies=True,
        )
    with pytest.raises(ValueError):
        DiscreteDistribution(
            [("a", 1, "w1"), ("b", 2), ("c", 3, "w2"), ("d", "4", "w2")],
            ["X1", "X2", "X3"],
            consistencies=True,
        )
    with pytest.raises(ValueError):
        DiscreteDistribution(
            [(1, "w1"), ("b", 2, "w1"), ("c", 3, "w2"), ("d", "4", "w2")],
            ["X1", "X2", "X3"],
            consistencies=True,
        )
    with pytest.raises(ValueError):
        DiscreteDistribution(
            [("a", None, "w1"), ("b", 2, "w1"), ("c", 3, "w2"),
             ("d", "4", "w2")],
            ["X1", "X2", "X3"],
            consistencies=True,
        )
    with pytest.raises(ValueError):
        DiscreteDistribution(
            [("a", 1, "w1", None), ("b", 2, "w1", 2), ("c", 3, "w2", 1)],
            ["X1", "X2", "X3", "X4"],
            consistencies=True,
        )
    with pytest.raises(ValueError):
        DiscreteDistribution(
            [("a", 1, "w1", 4), ("b", None, "w1", 2), ("c", 3, "w2", 1)],
            ["X1", "X2", "X3", "X4"],
            consistencies=True,
        )
    with pytest.raises(ValueError):
        DiscreteDistribution(
            [("a", 1, "w1", "4"), ("b", 2, "w1", 2), ("c", 3, "w2", 1)],
            ["X1", "X2", "X3", "X4"],
            consistencies=True,
        )
    with pytest.raises(ValueError):
        DiscreteDistribution(
            [("a", 1, 1, 4), ("b", 2, "w1", 2), ("c", 3, "w2", 1)],
            ["X1", "X2", "X3", "X4"],
            consistencies=True,
        )
    with pytest.raises(ValueError):
        DiscreteDistribution(
            [("a", 1, "w1", 4), ("b", "2", "w1", 2), ("c", 3, "w2", 1)],
            ["X1", "X2", "X3", "X4"],
            consistencies=True,
        )
    with pytest.raises(ValueError):
        DiscreteDistribution(
            [("a", 1, "w1", 4), (1, 2, "w1", 2), ("c", 3, "w2", 1)],
            ["X1", "X2", "X3", "X4"],
            consistencies=True,
        )
    with pytest.raises(ValueError):
        DiscreteDistribution(
            [("a", 1, "w1", 4), ("b", 2, "w1", 2), ("c", "3", "w2", "1")],
            ["X1", "X2", "X3", "X4"],
            consistencies=True,
        )
    with pytest.raises(ValueError):
        DiscreteDistribution(
            [("a", 1, "w1", 4), ("b", 2, 2), ("c", 3, "w2", 1)],
            ["X1", "X2", "X3", "X4"],
            consistencies=True,
        )
def test_marginal_by_name_discrete_distribution():
    # Four levels dist.
    samples = {
        ("a", "x", 1, 33): 1,
        ("a", "x", 2, 33): 2,
        ("a", "x", 1, 44): 3,
        ("a", "x", 2, 44): 4,
        ("a", "y", 1, 33): 5,
        ("a", "y", 2, 33): 6,
        ("a", "y", 1, 44): 7,
        ("a", "y", 2, 44): 8,
        ("b", "x", 1, 33): 9,
        ("b", "x", 2, 33): 10,
        ("b", "x", 1, 44): 11,
        ("b", "x", 2, 44): 12,
        ("b", "y", 1, 33): 13,
        ("b", "y", 2, 33): 14,
        ("b", "y", 1, 44): 15,
        ("b", "y", 2, 44): 16,
    }
    disc_dist = DiscreteDistribution(samples,
                                     names=["Age", "Sex", "Edu", "Etn"])
    disc_dist2 = disc_dist.marginal("Edu")
    assert disc_dist2.total == disc_dist.total
    assert all(
        compare(
            disc_dist2.keys_as_list(),
            [
                ("a", "x", 33),
                ("a", "x", 44),
                ("a", "y", 33),
                ("a", "y", 44),
                ("b", "x", 33),
                ("b", "x", 44),
                ("b", "y", 33),
                ("b", "y", 44),
            ],
        ))
    assert disc_dist2[("a", "x", 33)] == 3
    assert disc_dist2[("a", "x", 44)] == 7
    assert disc_dist2[("a", "y", 33)] == 11
    assert disc_dist2[("a", "y", 44)] == 15
    assert disc_dist2[("b", "x", 33)] == 19
    assert disc_dist2[("b", "x", 44)] == 23
    assert disc_dist2[("b", "y", 33)] == 27
    assert disc_dist2[("b", "y", 44)] == 31
    assert disc_dist2.probability(("a", "x", 33)) == 3 / 136
    assert disc_dist2.probability(("a", "x", 44)) == 7 / 136
    assert disc_dist2.probability(("a", "y", 33)) == 11 / 136
    assert disc_dist2.probability(("a", "y", 44)) == 15 / 136
    assert disc_dist2.probability(("b", "x", 33)) == 19 / 136
    assert disc_dist2.probability(("b", "x", 44)) == 23 / 136
    assert disc_dist2.probability(("b", "y", 33)) == 27 / 136
    assert disc_dist2.probability(("b", "y", 44)) == 31 / 136

    disc_dist2 = disc_dist.marginal("Etn")
    assert disc_dist2.total == disc_dist.total
    assert all(
        compare(
            disc_dist2.keys_as_list(),
            [
                ("a", "x", 1),
                ("a", "x", 2),
                ("a", "y", 1),
                ("a", "y", 2),
                ("b", "x", 1),
                ("b", "x", 2),
                ("b", "y", 1),
                ("b", "y", 2),
            ],
        ))
    assert disc_dist2[("a", "x", 1)] == 4
    assert disc_dist2[("a", "x", 2)] == 6
    assert disc_dist2[("a", "y", 1)] == 12
    assert disc_dist2[("a", "y", 2)] == 14
    assert disc_dist2[("b", "x", 1)] == 20
    assert disc_dist2[("b", "x", 2)] == 22
    assert disc_dist2[("b", "y", 1)] == 28
    assert disc_dist2[("b", "y", 2)] == 30
    assert disc_dist2.probability(("a", "x", 1)) == 4 / 136
    assert disc_dist2.probability(("a", "x", 2)) == 6 / 136
    assert disc_dist2.probability(("a", "y", 1)) == 12 / 136
    assert disc_dist2.probability(("a", "y", 2)) == 14 / 136
    assert disc_dist2.probability(("b", "x", 1)) == 20 / 136
    assert disc_dist2.probability(("b", "x", 2)) == 22 / 136
    assert disc_dist2.probability(("b", "y", 1)) == 28 / 136
    assert disc_dist2.probability(("b", "y", 2)) == 30 / 136

    disc_dist2 = disc_dist.marginal("Age", "Etn")
    assert disc_dist2.total == disc_dist.total
    assert all(
        compare(disc_dist2.keys_as_list(), [("x", 1), ("x", 2), ("y", 1),
                                            ("y", 2)]))
    assert disc_dist2[("x", 1)] == 24
    assert disc_dist2[("x", 2)] == 28
    assert disc_dist2[("y", 1)] == 40
    assert disc_dist2[("y", 2)] == 44
    assert disc_dist2.probability(("x", 1)) == 24 / 136
    assert disc_dist2.probability(("x", 2)) == 28 / 136
    assert disc_dist2.probability(("y", 1)) == 40 / 136
    assert disc_dist2.probability(("y", 2)) == 44 / 136

    disc_dist2 = disc_dist.marginal("Age", "Sex", "Etn")
    assert disc_dist2.total == disc_dist.total
    assert all(compare(disc_dist2.keys_as_list(), [1, 2]))
    assert disc_dist2[1] == 64
    assert disc_dist2[2] == 72
    assert disc_dist2.probability(1) == 64 / 136
    assert disc_dist2.probability(2) == 72 / 136

    # marginalize two times
    disc_dist2 = disc_dist.marginal("Age", "Etn")
    disc_dist3 = disc_dist2.marginal("Sex")
    assert disc_dist3.total == disc_dist.total
    assert all(compare(disc_dist3.keys_as_list(), [1, 2]))
    assert disc_dist3[1] == 64
    assert disc_dist3[2] == 72
    assert disc_dist3.probability(1) == 64 / 136
    assert disc_dist3.probability(2) == 72 / 136

    # marginalize three times
    disc_dist2 = disc_dist.marginal("Etn")
    disc_dist3 = disc_dist2.marginal("Edu")
    disc_dist4 = disc_dist3.marginal("Sex")
    assert disc_dist4.total == disc_dist.total
    assert all(compare(disc_dist4.keys_as_list(), ["a", "b"]))
    assert disc_dist4["a"] == 36
    assert disc_dist4["b"] == 100
    assert disc_dist4.probability("a") == 36 / 136
    assert disc_dist4.probability("b") == 100 / 136
Beispiel #20
0
 def from_multilevels_sample(cls, samples, names=None):
     ft = DiscreteDistribution(samples, names)
     return cls(ft)
Beispiel #21
0
def test_one_levels_discrete_distribution():
    dist = DiscreteDistribution({"Dog": 2})
    assert all(compare(dist.keys_as_list(), ["Dog"]))
    assert dist.rvs.size == 1
    assert dist["Dog"] == 2
    assert dist["Cat"] == 0
    assert all(compare(dist.frequencies(normalised=True), [1]))
    assert all(compare(dist.frequencies(normalised=False), [2]))
    assert dist.prob("Dog") == 1
    assert dist.prob(X1="Dog") == 1

    dist = DiscreteDistribution({"Dog": 2, "Cat": 3})
    assert all(compare(dist.keys_as_list(), ["Dog", "Cat"]))
    assert dist.rvs.size == 1
    assert dist["Dog"] == 2
    assert dist["Cat"] == 3
    assert dist["Dolphin"] == 0
    assert all(compare(dist.frequencies(normalised=True), [2 / 5, 3 / 5]))
    assert all(compare(dist.frequencies(normalised=False), [2, 3]))
    assert dist.prob("Dog") == 2 / 5
    assert dist.prob(X1="Dog") == 2 / 5
    assert dist.prob("Cat") == 3 / 5
    assert dist.prob(X1="Cat") == 3 / 5
    assert dist.prob("Dolphin") == 0
    assert dist.prob(X1="Dolphin") == 0

    dist = DiscreteDistribution({"Dog": 2, "Cat": 3, "Dolphin": 4})
    assert all(compare(dist.keys_as_list(), ["Dog", "Cat", "Dolphin"]))
    assert dist.rvs.size == 1
    assert dist["Dog"] == 2
    assert dist["Cat"] == 3
    assert dist["Dolphin"] == 4
    assert dist["Tiger"] == 0
    assert all(
        compare(dist.frequencies(normalised=True), [2 / 9, 3 / 9, 4 / 9]))
    assert all(compare(dist.frequencies(normalised=False), [2, 3, 4]))
    assert dist.prob("Dog") == 2 / 9
    assert dist.prob(X1="Dog") == 2 / 9
    assert dist.prob("Cat") == 3 / 9
    assert dist.prob(X1="Cat") == 3 / 9
    assert dist.prob("Dolphin") == 4 / 9
    assert dist.prob(X1="Dolphin") == 4 / 9
    assert dist.prob("Tiger") == 0
    assert dist.prob(X1="Tiger") == 0
def test_marginals_names_exception_discrete_distribution():
    # Wrong rv name
    with pytest.raises(ValueError):
        samples = {"a": 3, "b": 4, "c": 5}
        disc_dist = DiscreteDistribution(samples)
        disc_dist.marginal("X1")
    # Wrong rv name
    with pytest.raises(ValueError):
        samples = {("a", "x"): 4, ("a", "y"): 4, ("b", "x"): 6, ("b", "y"): 6}
        disc_dist = DiscreteDistribution(samples)
        disc_dist.marginal("X0")
    # Wrong rv name
    with pytest.raises(ValueError):
        samples = {("a", "x"): 4, ("a", "y"): 4, ("b", "x"): 6, ("b", "y"): 6}
        disc_dist = DiscreteDistribution(samples)
        disc_dist.marginal("X3")
    # Wrong rv name
    with pytest.raises(ValueError):
        samples = {("a", "x"): 4, ("a", "y"): 4, ("b", "x"): 6, ("b", "y"): 6}
        disc_dist = DiscreteDistribution(samples)
        disc_dist2 = disc_dist.marginal("X1")
        disc_dist2.marginal("X1")
    # Wrong rv name
    with pytest.raises(ValueError):
        samples = {("a", "x"): 4, ("a", "y"): 4, ("b", "x"): 6, ("b", "y"): 6}
        disc_dist = DiscreteDistribution(samples, names=["Y", "Z"])
        disc_dist.marginal("X1")
    # Wrong rv name
    with pytest.raises(ValueError):
        samples = {("a", "x"): 4, ("a", "y"): 4, ("b", "x"): 6, ("b", "y"): 6}
        disc_dist = DiscreteDistribution(samples, names=["Y", "Z"])
        disc_dist.marginal("X1")
    # Wrong rv name
    with pytest.raises(ValueError):
        samples = {("a", "x"): 4, ("a", "y"): 4, ("b", "x"): 6, ("b", "y"): 6}
        disc_dist = DiscreteDistribution(samples, names=["Y", "Z"])
        disc_dist2 = disc_dist.marginal("Y")
        disc_dist2.marginal("Y")

    # Marginalize over all vars
    with pytest.raises(ValueError):
        samples = {("a", "x"): 4, ("a", "y"): 4, ("b", "x"): 6, ("b", "y"): 6}
        disc_dist = DiscreteDistribution(samples, names=["Y", "Z"])
        disc_dist2 = disc_dist.marginal("Y", "Z")
def test_marginals_names_discrete_distribution():
    samples = {("a", "x"): 4, ("a", "y"): 4, ("b", "x"): 6, ("b", "y"): 6}
    disc_dist = DiscreteDistribution(samples)

    disc_dist2 = disc_dist.marginal("X1")
    assert all(compare(disc_dist2.names, ["X2"]))

    disc_dist2 = disc_dist.marginal("X2")
    assert all(compare(disc_dist2.names, ["X1"]))
    #
    disc_dist = DiscreteDistribution(samples, names=["Y", "Z"])

    disc_dist2 = disc_dist.marginal("Y")
    assert all(compare(disc_dist2.names, ["Z"]))

    disc_dist2 = disc_dist.marginal("Z")
    assert all(compare(disc_dist2.names, ["Y"]))

    # Three levels dist.
    samples = {
        ("a", "x", 1): 4,
        ("a", "x", 2): 4,
        ("a", "y", 1): 6,
        ("a", "y", 2): 6,
        ("b", "x", 1): 8,
        ("b", "x", 2): 8,
        ("b", "y", 1): 10,
        ("b", "y", 2): 10,
    }

    disc_dist = DiscreteDistribution(samples)

    disc_dist2 = disc_dist.marginal("X1")
    assert all(compare(disc_dist2.names, ["X2", "X3"]))

    disc_dist2 = disc_dist.marginal("X2")
    assert all(compare(disc_dist2.names, ["X1", "X3"]))

    disc_dist2 = disc_dist.marginal("X3")
    assert all(compare(disc_dist2.names, ["X1", "X2"]))

    disc_dist2 = disc_dist.marginal("X1", "X3")
    assert all(compare(disc_dist2.names, ["X2"]))

    disc_dist2 = disc_dist.marginal("X2", "X3")
    assert all(compare(disc_dist2.names, ["X1"]))

    #
    disc_dist = DiscreteDistribution(samples, names=["Y", "Z", "W"])

    disc_dist2 = disc_dist.marginal("Y")
    assert all(compare(disc_dist2.names, ["Z", "W"]))

    disc_dist2 = disc_dist.marginal("Z")
    assert all(compare(disc_dist2.names, ["Y", "W"]))

    disc_dist2 = disc_dist.marginal("W")
    assert all(compare(disc_dist2.names, ["Y", "Z"]))

    disc_dist2 = disc_dist.marginal("Y", "W")
    assert all(compare(disc_dist2.names, ["Z"]))

    disc_dist2 = disc_dist.marginal("Z", "W")
    assert all(compare(disc_dist2.names, ["Y"]))