def test_product_with_single_column_table(): single_table = Table({"A": 3, "B": 4, "C": 7}, names=["Y1"]) table1 = Table(sample_1, names=["X1", "X2", "X3", "X4"]) # without common names table3 = table1 * single_table assert all(compare(table3.names, ["X1", "X2", "X3", "X4", "Y1"])) # check probabilites assert table3[("a", "y", 2, 33, "B")] == 24 table3 = single_table * table1 assert all(compare(table3.names, ["Y1", "X1", "X2", "X3", "X4"])) # check probabilites assert table3[("B", "a", "y", 2, 33)] == 24 # with common names single_table = Table({"x": 3, "y": 4}, names=["X2"]) table1 = Table(sample_1, names=["X1", "X2", "X3", "X4"]) table3 = table1 * single_table assert all(compare(table3.names, ["X1", "X2", "X3", "X4"])) # check probabilites assert table3["a", "x", 2, 44] == 12 assert table3["a", "y", 2, 44] == 32 table3 = single_table * table1 assert all(compare(table3.names, ["X2", "X1", "X3", "X4"]))
def test_product_with_frequency_table_discrete_distribution(): freq_table1 = FrequencyTable({"A": 3, "B": 4, "C": 7}, name="Y1") dist1 = DiscreteDistribution(sample_1, names=["X1", "X2", "X3", "X4"]) # without common names dist3 = dist1 * freq_table1 assert all(compare(dist3.names, ["X1", "X2", "X3", "X4", "Y1"])) assert dist3.total == (dist1.total * freq_table1.total) # check probabilites assert dist3.frequency(("a", "y", 2, 33, "B")) == 6 * 4 assert dist3.probability(("a", "y", 2, 33, "B")) == 24 / 1708 assert dist3[("a", "y", 2, 33, "B")] == 24 dist3 = freq_table1 * dist1 assert all(compare(dist3.names, ["Y1", "X1", "X2", "X3", "X4"])) assert dist3.total == (dist1.total * freq_table1.total) # check probabilites assert dist3.frequency(("B", "a", "y", 2, 33)) == 6 * 4 assert dist3.probability(("B", "a", "y", 2, 33)) == 24 / 1708 assert dist3[("B", "a", "y", 2, 33)] == 24 # with common names freq_table1 = FrequencyTable({"x": 3, "y": 4}, name="X2") dist1 = DiscreteDistribution(sample_1, names=["X1", "X2", "X3", "X4"]) dist3 = dist1 * freq_table1 assert all(compare(dist3.names, ["X1", "X2", "X3", "X4"])) assert dist3.total == 52 * 3 + 70 * 4 dist3 = freq_table1 * dist1 assert all(compare(dist3.names, ["X2", "X1", "X3", "X4"])) assert dist3.total == 52 * 3 + 70 * 4
def test_conditional_on_table(): table1 = Table(samples, names=["X1", "X2", "X3", "X4"]) con_table1 = table1.condition_on("X2") assert all(compare(con_table1.names, ["X2"])) for x2 in con_table1.keys(): child_table = con_table1[x2] assert all(compare(child_table.names, ["X1", "X3", "X4"])) child_table1 = con_table1["x"] child_table2 = con_table1["y"] assert child_table1["a", 1, 33] == 1 / 52 assert child_table2["a", 1, 33] == 5 / 84 assert child_table1["a", 1, 44] == 3 / 52 assert child_table2["a", 1, 44] == 7 / 84 assert child_table1["b", 1, 33] == 9 / 52 assert child_table2["b", 1, 33] == 13 / 84 assert child_table1["b", 1, 44] == 11 / 52 assert child_table2["b", 1, 44] == 15 / 84 assert child_table1["b", 2, 44] == 12 / 52 assert child_table2["b", 2, 33] == 14 / 84 # combined indexing assert con_table1["x"]["a", 1, 33] == 1 / 52 assert con_table1["y"]["a", 1, 33] == 5 / 84
def test_conditional_discrete_distribution(): # Four levels dist. samples = { ("a", "x", 1, 33): 1, ("a", "x", 2, 33): 2, ("a", "x", 1, 44): 3, ("a", "x", 2, 44): 4, ("a", "y", 1, 33): 5, ("a", "y", 2, 33): 6, ("a", "y", 1, 44): 7, ("a", "y", 2, 44): 8, ("b", "x", 1, 33): 9, ("b", "x", 2, 33): 10, ("b", "x", 1, 44): 11, ("b", "x", 2, 44): 12, ("b", "y", 1, 33): 13, ("b", "y", 2, 33): 14, ("b", "y", 1, 44): 15, ("b", "y", 2, 44): 16, } disc_dist = DiscreteDistribution(samples) con_disc_dist = disc_dist.condition_on("X2") assert all(compare(con_disc_dist.conditional_rvs.names, ["X2"])) assert all( compare(con_disc_dist.distributions["x"].names, ["X1", "X3", "X4"])) assert all( compare(con_disc_dist.distributions["y"].names, ["X1", "X3", "X4"])) assert con_disc_dist.frequency(("a", 1, 33), "x") == 1 assert con_disc_dist.frequency(("a", 1, 33), "y") == 5 assert con_disc_dist.frequency(("a", 1, 44), "x") == 3 assert con_disc_dist.frequency(("a", 1, 44), "y") == 7 assert con_disc_dist.frequency(("b", 1, 33), "x") == 9 assert con_disc_dist.frequency(("b", 1, 33), "y") == 13 assert con_disc_dist.frequency(("b", 1, 44), "x") == 11 assert con_disc_dist.frequency(("b", 1, 44), "y") == 15 assert con_disc_dist.frequency(("b", 2, 44), "x") == 12 assert con_disc_dist.frequency(("b", 2, 33), "y") == 14 assert con_disc_dist.probability(("a", 1, 33), "x") == 1 / 52 assert con_disc_dist.probability(("a", 1, 33), "y") == 5 / 84 assert con_disc_dist.probability(("a", 1, 44), "x") == 3 / 52 assert con_disc_dist.probability(("a", 1, 44), "y") == 7 / 84 assert con_disc_dist.probability(("b", 1, 33), "x") == 9 / 52 assert con_disc_dist.probability(("b", 1, 33), "y") == 13 / 84 assert con_disc_dist.probability(("b", 1, 44), "x") == 11 / 52 assert con_disc_dist.probability(("b", 1, 44), "y") == 15 / 84 assert con_disc_dist.probability(("b", 2, 44), "x") == 12 / 52 assert con_disc_dist.probability(("b", 2, 33), "y") == 14 / 84
def test_marginals_operator_discrete_distribution(): # Four levels dist. samples = { ("a", "x", 1, 33): 1, ("a", "x", 2, 33): 2, ("a", "x", 1, 44): 3, ("a", "x", 2, 44): 4, ("a", "y", 1, 33): 5, ("a", "y", 2, 33): 6, ("a", "y", 1, 44): 7, ("a", "y", 2, 44): 8, ("b", "x", 1, 33): 9, ("b", "x", 2, 33): 10, ("b", "x", 1, 44): 11, ("b", "x", 2, 44): 12, ("b", "y", 1, 33): 13, ("b", "y", 2, 33): 14, ("b", "y", 1, 44): 15, ("b", "y", 2, 44): 16, } disc_dist = DiscreteDistribution(samples) assert (disc_dist << "X2").total == disc_dist.total assert (disc_dist << ("X2", "X3")).total == disc_dist.total assert (disc_dist << ("X2", "X3", "X4")).total == disc_dist.total assert all( compare((disc_dist << ("X1", "X2", "X4")).keys_as_list(), [1, 2])) assert all( compare((disc_dist << ("X1", "X2", "X3")).keys_as_list(), [33, 44])) assert all( compare((disc_dist << ("X2", "X3", "X4")).keys_as_list(), ["a", "b"])) assert all( compare( (disc_dist << ("X2", "X3")).keys_as_list(), [("a", 33), ("a", 44), ("b", 33), ("b", 44)], )) disc_dist = DiscreteDistribution(samples, names=["Age", "Sex", "Education", "City"]) assert (disc_dist << ("Age")).total == disc_dist.total assert (disc_dist << ("Sex", "Education")).total == disc_dist.total assert (disc_dist << ("Sex", "Education", "City")).total == disc_dist.total assert all( compare((disc_dist << ("Age", "Sex", "City")).keys_as_list(), [1, 2])) assert all( compare((disc_dist << ("Age", "Sex", "Education")).keys_as_list(), [33, 44])) assert all( compare((disc_dist << ("Sex", "Education", "City")).keys_as_list(), ["a", "b"])) assert all( compare( (disc_dist << ("Sex", "Education")).keys_as_list(), [("a", 33), ("a", 44), ("b", 33), ("b", 44)], ))
def test_avg_discrete_distribution(): samples = { (1, 1, 1): 1, (1, 1, 2): 1, (1, 1, 3): 1, (1, 2, 1): 2, (1, 2, 2): 2, (1, 2, 3): 2, (1, 3, 1): 3, (1, 3, 2): 3, (1, 3, 3): 3, } dist = DiscreteDistribution(samples) assert all(compare(dist.avg(), [1, (3 + 12 + 27) / 18, 2])) assert all(compare(dist.avg(indices=[0, 1, 2]), [1, (3 + 12 + 27) / 18, 2])) assert all(compare(dist.avg(indices=[0, 2, 1]), [1, 2, (3 + 12 + 27) / 18])) assert all(compare(dist.avg(indices=[0, 1]), [1, (3 + 12 + 27) / 18])) assert all(compare(dist.avg(indices=[0, 2]), [1, 2])) assert all(compare(dist.avg(indices=[2, 0]), [2, 1])) assert all(compare(dist.avg(indices=[1, 2]), [(3 + 12 + 27) / 18, 2])) assert dist.avg(indices=[0]) == 1 assert dist.std(indices=[0]) == 0 assert dist.avg(indices=[1]) == (3 + 12 + 27) / 18 assert dist.std(indices=[1]) == approx(0.55555555555556) assert dist.avg(indices=[2]) == 2 assert dist.std(indices=[2]) == approx(0.66666666666667)
def test_nap_exception(): ne = NapException() compare(str(ne), u"{'message': 'NapException'}") ne = NapException('A nice message') compare(str(ne), u"{'message': 'A nice message'}") ne = NapException('A nice message', some='some123', values='values321') compare(str(ne), u"{'some': 'some123', 'message': 'A nice message', 'values': 'values321'}") compare(ne.to_dict(), {'some': 'some123', 'message': 'A nice message', 'values': 'values321'})
def test_one_levels_discrete_distribution(): dist = DiscreteDistribution({"Dog": 2}) assert all(compare(dist.keys_as_list(), ["Dog"])) assert dist.rvs.size == 1 assert dist["Dog"] == 2 assert dist["Cat"] == 0 assert all(compare(dist.frequencies(normalised=True), [1])) assert all(compare(dist.frequencies(normalised=False), [2])) assert dist.prob("Dog") == 1 assert dist.prob(X1="Dog") == 1 dist = DiscreteDistribution({"Dog": 2, "Cat": 3}) assert all(compare(dist.keys_as_list(), ["Dog", "Cat"])) assert dist.rvs.size == 1 assert dist["Dog"] == 2 assert dist["Cat"] == 3 assert dist["Dolphin"] == 0 assert all(compare(dist.frequencies(normalised=True), [2 / 5, 3 / 5])) assert all(compare(dist.frequencies(normalised=False), [2, 3])) assert dist.prob("Dog") == 2 / 5 assert dist.prob(X1="Dog") == 2 / 5 assert dist.prob("Cat") == 3 / 5 assert dist.prob(X1="Cat") == 3 / 5 assert dist.prob("Dolphin") == 0 assert dist.prob(X1="Dolphin") == 0 dist = DiscreteDistribution({"Dog": 2, "Cat": 3, "Dolphin": 4}) assert all(compare(dist.keys_as_list(), ["Dog", "Cat", "Dolphin"])) assert dist.rvs.size == 1 assert dist["Dog"] == 2 assert dist["Cat"] == 3 assert dist["Dolphin"] == 4 assert dist["Tiger"] == 0 assert all( compare(dist.frequencies(normalised=True), [2 / 9, 3 / 9, 4 / 9])) assert all(compare(dist.frequencies(normalised=False), [2, 3, 4])) assert dist.prob("Dog") == 2 / 9 assert dist.prob(X1="Dog") == 2 / 9 assert dist.prob("Cat") == 3 / 9 assert dist.prob(X1="Cat") == 3 / 9 assert dist.prob("Dolphin") == 4 / 9 assert dist.prob(X1="Dolphin") == 4 / 9 assert dist.prob("Tiger") == 0 assert dist.prob(X1="Tiger") == 0
def test_reduce_by_name_on_conditioned_table(): table = Table(samples, names=["X1", "X2", "X3", "X4"]) con_1 = table.condition_on("X1") reduced_table = con_1.reduce(X2="y") assert reduced_table.columns.size == 1 assert all(compare(reduced_table.columns.children_names, ["X3", "X4"])) assert reduced_table["a"][1, 33] == 5 / 36 assert reduced_table["b"][(2, 44)] == 16 / 100 reduced_table = con_1.reduce(X2="y", X3=1) assert reduced_table.columns.size == 1 assert all(compare(reduced_table.columns.children_names, ["X4"])) assert reduced_table["a"][33] == 5 / 36 assert reduced_table["b"][44] == 15 / 100 con_1 = table.condition_on("X1", "X3") reduced_table = con_1.reduce(X2="y") assert reduced_table.columns.size == 2 assert all(compare(reduced_table.columns.children_names, ["X4"])) assert reduced_table["a", 1][33] == 5 / 16 assert reduced_table["b", 2][44] == 16 / 52
def test_product_with_two_common_vars_table(): table1 = Table(sample_1, names=["X1", "X2", "X3", "X4"]) table2 = Table(sample_2, names=["X3", "X5", "X6", "X2"]) table3 = table1 * table2 assert all(compare(table3.names, ["X1", "X2", "X3", "X4", "X5", "X6"])) # check probabilites assert table3[("a", "y", 2, 33, "high", "under")] == 150 # check the case that the right does not have the common assert table3[("a", "y", 2, 33, "low", "under")] is None # check the case that the left does not have the common assert table3[("b", "y", 2, 33, "high", "under")] is None
def test_product_with_no_common_vars_table(): table1 = Table(sample_1, names=["X1", "X2", "X3", "X4"]) table2 = Table(sample_2, names=["Y1", "Y2", "Y3", "Y4"]) table3 = table1 * table2 assert all( compare(table3.names, ["X1", "X2", "X3", "X4", "Y1", "Y2", "Y3", "Y4"])) # check probabilites assert table3[("a", "x", 1, 33, 2, "high", "normal", "x")] == 10 assert table3[("b", "x", 1, 44, 1, "low", "over", "y")] == 253
def test_conditional_operator_discrete_distribution(): # Four levels dist. samples = { ("a", "x", 1, 33): 1, ("a", "x", 2, 33): 2, ("a", "x", 1, 44): 3, ("a", "x", 2, 44): 4, ("a", "y", 1, 33): 5, ("a", "y", 2, 33): 6, ("a", "y", 1, 44): 7, ("a", "y", 2, 44): 8, ("b", "x", 1, 33): 9, ("b", "x", 2, 33): 10, ("b", "x", 1, 44): 11, ("b", "x", 2, 44): 12, ("b", "y", 1, 33): 13, ("b", "y", 2, 33): 14, ("b", "y", 1, 44): 15, ("b", "y", 2, 44): 16, } disc_dist = DiscreteDistribution(samples) con_disc_dist = disc_dist | "X2" assert all(compare(con_disc_dist.conditional_rvs.names, ["X2"])) assert all( compare(con_disc_dist.distributions["x"].names, ["X1", "X3", "X4"])) assert all( compare(con_disc_dist.distributions["y"].names, ["X1", "X3", "X4"])) assert con_disc_dist.frequency(("a", 1, 33), "x") == 1 assert con_disc_dist.frequency(("a", 1, 33), "y") == 5 assert con_disc_dist.probability(("a", 1, 33), "x") == 1 / 52 assert con_disc_dist.probability(("a", 1, 33), "y") == 5 / 84 con_disc_dist = disc_dist | ("X2", "X3") assert all(compare(con_disc_dist.conditional_rvs.names, ["X2", "X3"])) assert all( compare(con_disc_dist.distributions[("x", 1)].names, ["X1", "X4"])) assert all( compare(con_disc_dist.distributions[("x", 2)].names, ["X1", "X4"])) assert all( compare(con_disc_dist.distributions[("y", 1)].names, ["X1", "X4"])) assert all( compare(con_disc_dist.distributions[("y", 2)].names, ["X1", "X4"])) assert con_disc_dist.frequency(("a", 33), ("x", 1)) == 1 assert con_disc_dist.probability(("a", 33), ("x", 1)) == 1 / 24
def test_product_with_one_common_var_table(): table1 = Table(sample_1, names=["X1", "X2", "X3", "X4"]) table2 = Table(sample_2, names=["X3", "X5", "X6", "X7"]) table3 = table1 * table2 assert all( compare(table3.names, ["X1", "X2", "X3", "X4", "X5", "X6", "X7"])) # check probabilites assert table3["a", "x", 1, 33, "high", "normal", "x"] == 2 # check the case that the right does not have the common assert table3[("b", "y", 2, 44, "high", "over", "y")] is None # check the case that the left does not have the common assert table3[("b", "y", 2, 33, "high", "normal", "y")] is None
def test_product_with_two_common_vars_discrete_distribution(): dist1 = DiscreteDistribution(sample_1, names=["X1", "X2", "X3", "X4"]) dist2 = DiscreteDistribution(sample_2, names=["X3", "X5", "X6", "X2"]) dist3 = dist1 * dist2 assert all(compare(dist3.names, ["X1", "X2", "X3", "X4", "X5", "X6"])) assert dist3.total == 24 * 36 + 28 * 100 + 40 * 164 + 30 * 51 # check probabilites assert dist3.frequency(("a", "y", 2, 33, "high", "under")) == 6 * 25 assert dist3.probability(("a", "y", 2, 33, "high", "under")) == 150 / 11754 assert dist3[("a", "y", 2, 33, "high", "under")] == 150 # check the case that the right does not have the common assert dist3.frequency(("a", "y", 2, 33, "low", "under")) == 0 assert dist3.probability(("a", "y", 2, 33, "low", "under")) == 0 assert dist3[("a", "y", 2, 33, "low", "under")] == 0 # check the case that the left does not have the common assert dist3.frequency(("b", "y", 2, 33, "high", "under")) == 0 assert dist3.probability(("b", "y", 2, 33, "high", "under")) == 0 assert dist3[("b", "y", 2, 33, "high", "under")] == 0
def test_product_with_no_common_vars_discrete_distribution(): dist1 = DiscreteDistribution(sample_1, names=["X1", "X2", "X3", "X4"]) dist2 = DiscreteDistribution(sample_2, names=["Y1", "Y2", "Y3", "Y4"]) dist3 = dist1 * dist2 assert all( compare(dist3.names, ["X1", "X2", "X3", "X4", "Y1", "Y2", "Y3", "Y4"])) assert dist3.total == (dist1.total * dist2.total) # check probabilites assert dist3.frequency(("a", "x", 1, 33, 2, "high", "under", "x")) == 9 assert dist3.probability( ("a", "x", 1, 33, 2, "high", "normal", "x")) == 10 / 42822 assert dist3[("a", "x", 1, 33, 2, "high", "normal", "x")] == 10 assert dist3.frequency(("b", "x", 1, 44, 1, "low", "over", "y")) == 253 assert dist3.probability( ("b", "x", 1, 44, 1, "low", "over", "y")) == 253 / 42822 assert dist3[("b", "x", 1, 44, 1, "low", "over", "y")] == 253
def test_product_with_one_common_var_discrete_distribution(): dist1 = DiscreteDistribution(sample_1, names=["X1", "X2", "X3", "X4"]) dist2 = DiscreteDistribution(sample_2, names=["X3", "X5", "X6", "X7"]) dist3 = dist1 * dist2 assert all(compare(dist3.names, ["X1", "X2", "X3", "X4", "X5", "X6", "X7"])) assert dist3.total == (36 + 164) * 64 + (100 + 51) * 58 # check probabilites assert dist3.frequency(("a", "x", 1, 33, "high", "normal", "x")) == 2 assert dist3.probability( ("a", "x", 1, 33, "high", "normal", "x")) == 2 / 21558 assert dist3[("a", "x", 1, 33, "high", "normal", "x")] == 2 # check the case that the right does not have the common assert dist3.frequency(("b", "y", 2, 44, "high", "over", "y")) == 0 assert dist3.probability(("b", "y", 2, 44, "high", "over", "y")) == 0 assert dist3[("b", "y", 2, 44, "high", "over", "y")] == 0 # check the case that the left does not have the common assert dist3.frequency(("b", "y", 2, 33, "high", "normal", "y")) == 0 assert dist3.probability(("b", "y", 2, 33, "high", "normal", "y")) == 0 assert dist3[("b", "y", 2, 33, "high", "normal", "y")] == 0
def test_reduce_by_name_table(): table = Table(samples) reduced_table = table.reduce(X2="y") assert reduced_table.columns.size == 3 assert all(compare(reduced_table.names, ["X1", "X3", "X4"])) assert reduced_table[("a", 1, 33)] == 5 assert reduced_table[("b", 2, 44)] == 16 reduced_table = table.reduce(X2="y", X3=1) assert reduced_table.columns.size == 2 assert all(compare(reduced_table.names, ["X1", "X4"])) assert reduced_table[("a", 33)] == 5 assert reduced_table[("b", 44)] == 15 reduced_table = table.reduce(X1="b", X3=1, X4=44) assert reduced_table.columns.size == 1 assert all(compare(reduced_table.names, ["X2"])) assert reduced_table["x"] == 11 assert reduced_table["y"] == 15 table = Table(samples, names=["Y", "Z", "W", "X"]) reduced_table = table.reduce(Z="y") assert reduced_table.columns.size == 3 assert all(compare(reduced_table.names, ["Y", "W", "X"])) assert reduced_table[("a", 1, 33)] == 5 assert reduced_table[("b", 2, 44)] == 16 reduced_table = table.reduce(Z="y", W=1) assert reduced_table.columns.size == 2 assert all(compare(reduced_table.names, ["Y", "X"])) assert reduced_table[("a", 33)] == 5 assert reduced_table[("b", 44)] == 15 reduced_table = table.reduce(Y="b", W=1, X=44) assert reduced_table.columns.size == 1 assert all(compare(reduced_table.names, ["Z"])) assert reduced_table["x"] == 11 assert reduced_table["y"] == 15
def test_unauthorized_exception(): ne = UnauthorizedException(model_name='User') compare(str(ne), u"{'message': 'Unauthorized.', 'model_name': 'User'}")
def test_model_invalid_exception(): ne = ModelInvalidException(model_name='User', errors=['Field missing']) compare(str(ne), u"{'message': 'Model is invalid.', 'errors': ['Field missing'], 'model_name': 'User'}")
def test_model_not_found_exception(): ne = ModelNotFoundException(model_name='User', model_id=1) compare(str(ne), u"{'model_id': 1, 'message': 'Model not found.', 'model_name': 'User'}")
def test_marginals_names_discrete_distribution(): samples = {("a", "x"): 4, ("a", "y"): 4, ("b", "x"): 6, ("b", "y"): 6} disc_dist = DiscreteDistribution(samples) disc_dist2 = disc_dist.marginal("X1") assert all(compare(disc_dist2.names, ["X2"])) disc_dist2 = disc_dist.marginal("X2") assert all(compare(disc_dist2.names, ["X1"])) # disc_dist = DiscreteDistribution(samples, names=["Y", "Z"]) disc_dist2 = disc_dist.marginal("Y") assert all(compare(disc_dist2.names, ["Z"])) disc_dist2 = disc_dist.marginal("Z") assert all(compare(disc_dist2.names, ["Y"])) # Three levels dist. samples = { ("a", "x", 1): 4, ("a", "x", 2): 4, ("a", "y", 1): 6, ("a", "y", 2): 6, ("b", "x", 1): 8, ("b", "x", 2): 8, ("b", "y", 1): 10, ("b", "y", 2): 10, } disc_dist = DiscreteDistribution(samples) disc_dist2 = disc_dist.marginal("X1") assert all(compare(disc_dist2.names, ["X2", "X3"])) disc_dist2 = disc_dist.marginal("X2") assert all(compare(disc_dist2.names, ["X1", "X3"])) disc_dist2 = disc_dist.marginal("X3") assert all(compare(disc_dist2.names, ["X1", "X2"])) disc_dist2 = disc_dist.marginal("X1", "X3") assert all(compare(disc_dist2.names, ["X2"])) disc_dist2 = disc_dist.marginal("X2", "X3") assert all(compare(disc_dist2.names, ["X1"])) # disc_dist = DiscreteDistribution(samples, names=["Y", "Z", "W"]) disc_dist2 = disc_dist.marginal("Y") assert all(compare(disc_dist2.names, ["Z", "W"])) disc_dist2 = disc_dist.marginal("Z") assert all(compare(disc_dist2.names, ["Y", "W"])) disc_dist2 = disc_dist.marginal("W") assert all(compare(disc_dist2.names, ["Y", "Z"])) disc_dist2 = disc_dist.marginal("Y", "W") assert all(compare(disc_dist2.names, ["Z"])) disc_dist2 = disc_dist.marginal("Z", "W") assert all(compare(disc_dist2.names, ["Y"]))
def test_product_with_table_of_table_with_no_common_table(): table1 = Table(sample_1, names=["X1", "X2", "X3", "X4"]) table2 = Table({"one": 1, "two": 2, "three": 3}, names=["X1"]) con_1 = table1.condition_on("X1") # P(X2, X3, X4 | X1) * P(X1) -> P(X2, X3, X4, X1) product_1 = con_1 * table2 assert len(product_1) == 0 assert all(compare(product_1.names, ["X2", "X3", "X4", "X1"])) # P(X1) * P(X2, X3, X4 | X1) -> P(X1, X2, X3, X4) product_1 = table2 * con_1 assert len(product_1) == 0 assert all(compare(product_1.names, ["X1", "X2", "X3", "X4"])) table2 = Table({ ("one", "x"): 1, ("two", "x"): 2, ("three", "y"): 3 }, names=["X1", "X2"]) con_1 = table1.condition_on("X1", "X2") # P(X3, X4 | X1, X2) * P(X1, X2) -> P(X3, X4 , X1, X2) product_1 = con_1 * table2 assert len(product_1) == 0 assert all(compare(product_1.names, ["X3", "X4", "X1", "X2"])) # P(X1, X2) * P(X3, X4 | X1, X2) -> P( X1, X2, X3, X4) product_1 = table2 * con_1 assert len(product_1) == 0 assert all(compare(product_1.names, ["X1", "X2", "X3", "X4"])) con_2 = table2.condition_on("X1") # P(X3, X4 | X1, X2) * P(X2 | X1) -> P(X3, X4, X2 | X1) product_1 = con_1 * con_2 assert len(product_1) == 0 assert all(compare(product_1.names, ["X1"])) assert all(compare(product_1.columns.children_names, ["X3", "X4", "X2"])) # P(X2 | X1) * P(X3, X4 | X1, X2) -> P(X2, X3, X4 | X1) product_1 = con_2 * con_1 assert len(product_1) == 0 assert all(compare(product_1.names, ["X1"])) assert all(compare(product_1.columns.children_names, ["X2", "X3", "X4"])) table3 = Table( { ("one", "x", 1): 1, ("two", "x", 1): 2, ("three", "y", 2): 3 }, names=["X1", "X2", "X3"], ) con_1 = table1.condition_on("X1", "X2", "X3") # P(X4 | X1, X2, X3) * P(X1, X2, X3) -> P(X4, X1, X2, X3) product_1 = con_1 * table3 assert len(product_1) == 0 assert all(compare(product_1.names, ["X4", "X1", "X2", "X3"])) # P(X1, X2) * P(X3, X4 | X1, X2) -> P( X1, X2, X3, X4) product_1 = table3 * con_1 assert len(product_1) == 0 assert all(compare(product_1.names, ["X1", "X2", "X3", "X4"])) con_2 = table3.condition_on("X1") # P(X4 | X1, X2, X3) * P(X2, X3 | X1) -> P(X4, X2, X3 | X1) product_1 = con_1 * con_2 assert len(product_1) == 0 assert all(compare(product_1.names, ["X1"])) assert all(compare(product_1.columns.children_names, ["X4", "X2", "X3"])) # P(X2, X3 | X1) * P(X4 | X1, X2, X3) -> P(X2, X3, X4 | X1) product_1 = con_2 * con_1 assert len(product_1) == 0 assert all(compare(product_1.names, ["X1"])) assert all(compare(product_1.columns.children_names, ["X2", "X3", "X4"])) con_2 = table3.condition_on("X1", "X2") # P(X4 | X1, X2, X3) * P(X3 | X1, X2) -> P(X4, X3 | X1, X2) product_1 = con_1 * con_2 assert len(product_1) == 0 assert all(compare(product_1.names, ["X1", "X2"])) assert all(compare(product_1.columns.children_names, ["X4", "X3"])) # P(X3 | X1, X2) * P(X4 | X1, X2, X3) -> P(X3, X4 | X1, X2) product_1 = con_2 * con_1 assert len(product_1) == 0 assert all(compare(product_1.names, ["X1", "X2"])) assert all(compare(product_1.columns.children_names, ["X3", "X4"])) con_2 = table3.condition_on("X2", "X3") # P(X4 | X1, X2, X3) * P(X1 | X2, X3) -> P(X4, X1 | X2, X3) product_1 = con_1 * con_2 assert len(product_1) == 0 assert all(compare(product_1.names, ["X2", "X3"])) assert all(compare(product_1.columns.children_names, ["X4", "X1"])) # P(X1 | X2, X3) * P(X4 | X1, X2, X3) -> P(X1, X4 | X2, X3) product_1 = con_2 * con_1 assert len(product_1) == 0 assert all(compare(product_1.names, ["X2", "X3"])) assert all(compare(product_1.columns.children_names, ["X1", "X4"]))
def test_marginals_names_table(): samples = {("a", "x"): 4, ("a", "y"): 4, ("b", "x"): 6, ("b", "y"): 6} table = Table(samples) table2 = table.marginal("X1") assert all(compare(table2.names, ["X2"])) table2 = table.marginal("X2") assert all(compare(table2.names, ["X1"])) # table = Table(samples, names=["Y", "Z"]) table2 = table.marginal("Y") assert all(compare(table2.names, ["Z"])) table2 = table.marginal("Z") assert all(compare(table2.names, ["Y"])) # Three levels dist. samples = { ("a", "x", 1): 4, ("a", "x", 2): 4, ("a", "y", 1): 6, ("a", "y", 2): 6, ("b", "x", 1): 8, ("b", "x", 2): 8, ("b", "y", 1): 10, ("b", "y", 2): 10, } table = Table(samples) table2 = table.marginal("X1") assert all(compare(table2.names, ["X2", "X3"])) table2 = table.marginal("X2") assert all(compare(table2.names, ["X1", "X3"])) table2 = table.marginal("X3") assert all(compare(table2.names, ["X1", "X2"])) table2 = table.marginal("X1", "X3") assert all(compare(table2.names, ["X2"])) table2 = table.marginal("X2", "X3") assert all(compare(table2.names, ["X1"])) # table = Table(samples, names=["Y", "Z", "W"]) table2 = table.marginal("Y") assert all(compare(table2.names, ["Z", "W"])) table2 = table.marginal("Z") assert all(compare(table2.names, ["Y", "W"])) table2 = table.marginal("W") assert all(compare(table2.names, ["Y", "Z"])) table2 = table.marginal("Y", "W") assert all(compare(table2.names, ["Z"])) table2 = table.marginal("Z", "W") assert all(compare(table2.names, ["Y"]))
def test_marginal_if_table_of_table(): sample_1 = { ("a", "x", 1, 33): 1, ("a", "x", 2, 33): 2, ("a", "x", 1, 44): 3, ("a", "x", 2, 44): 4, ("a", "y", 1, 33): 5, ("a", "y", 2, 33): 6, ("a", "y", 1, 44): 7, ("a", "y", 2, 44): 8, ("b", "x", 1, 33): 9, ("b", "x", 2, 33): 10, ("b", "x", 1, 44): 11, ("b", "x", 2, 44): 12, ("b", "y", 1, 33): 13, # ("b", "y", 2, 33): 14, ("b", "y", 1, 44): 15, ("b", "y", 2, 44): 16, } table1 = Table(sample_1, names=["X1", "X2", "X3", "X4"]) con_1 = table1.condition_on("X1") with pytest.raises(ValueError): con_1.marginal("X1") marginal_1 = con_1.marginal("X2") assert all(compare(marginal_1.names, ["X1"])) assert all(compare(marginal_1.children_names, ["X3", "X4"])) assert marginal_1["a"][1, 33] == approx((1 + 5) / 36) assert marginal_1["a"][1, 44] == approx((3 + 7) / 36) assert marginal_1["a"][2, 33] == approx((2 + 6) / 36) assert marginal_1["a"][2, 44] == approx((4 + 8) / 36) assert marginal_1["b"][1, 33] == approx((9 + 13) / 86) assert marginal_1["b"][1, 44] == approx((11 + 15) / 86) con_1 = table1.condition_on("X1", normalise=False) marginal_1 = con_1.marginal("X2", normalise=False) assert marginal_1["a"][1, 33] == (1 + 5) assert marginal_1["a"][1, 44] == (3 + 7) assert marginal_1["a"][2, 33] == (2 + 6) assert marginal_1["a"][2, 44] == (4 + 8) assert marginal_1["b"][1, 33] == (9 + 13) assert marginal_1["b"][1, 44] == (11 + 15) marginal_1 = con_1.marginal("X2", "X3") assert all(compare(marginal_1.names, ["X1"])) assert all(compare(marginal_1.children_names, ["X4"])) assert marginal_1["a"][33] == approx((1 + 5 + 2 + 6) / 36) assert marginal_1["a"][44] == approx((3 + 7 + 4 + 8) / 36) assert marginal_1["b"][33] == approx((9 + 10 + 13) / 86) assert marginal_1["b"][44] == approx((11 + 12 + 15 + 16) / 86) marginal_1 = con_1.marginal("X2", "X4") assert all(compare(marginal_1.names, ["X1"])) assert all(compare(marginal_1.children_names, ["X3"])) assert marginal_1["a"][1] == approx((1 + 3 + 5 + 7) / 36) assert marginal_1["a"][2] == approx((2 + 4 + 6 + 8) / 36) assert marginal_1["b"][1] == approx((9 + 11 + 13 + 15) / 86) assert marginal_1["b"][2] == approx((10 + 12 + 16) / 86) con_2 = table1.condition_on("X1", "X3") with pytest.raises(ValueError): con_2.marginal("X1") with pytest.raises(ValueError): con_2.marginal("X3") with pytest.raises(ValueError): con_2.marginal("X2", "X4") marginal_2 = con_2.marginal("X2") assert all(compare(marginal_2.names, ["X1", "X3"])) assert all(compare(marginal_2.children_names, ["X4"])) assert marginal_2["a", 1][33] == approx((1 + 5) / 16) assert marginal_2["a", 1][44] == approx((3 + 7) / 16) assert marginal_2["a", 2][44] == approx((4 + 8) / 20) assert marginal_2["b", 1][33] == approx((9 + 13) / 48) assert marginal_2["b", 2][33] == approx(10 / 38) assert marginal_2["b", 2][44] == approx((12 + 16) / 38) con_3 = table1.condition_on("X1", "X3", "X4") with pytest.raises(ValueError): con_3.marginal("X2")
def test_marginal_by_name_table(): # Four levels dist. samples = { ("a", "x", 1, 33): 1, ("a", "x", 2, 33): 2, ("a", "x", 1, 44): 3, ("a", "x", 2, 44): 4, ("a", "y", 1, 33): 5, ("a", "y", 2, 33): 6, ("a", "y", 1, 44): 7, ("a", "y", 2, 44): 8, ("b", "x", 1, 33): 9, ("b", "x", 2, 33): 10, ("b", "x", 1, 44): 11, ("b", "x", 2, 44): 12, ("b", "y", 1, 33): 13, ("b", "y", 2, 33): 14, ("b", "y", 1, 44): 15, ("b", "y", 2, 44): 16, } table = Table(samples, names=["Age", "Sex", "Edu", "Etn"]) table2 = table.marginal("Edu") assert all( compare( table2.keys(), [ ("a", "x", 33), ("a", "x", 44), ("a", "y", 33), ("a", "y", 44), ("b", "x", 33), ("b", "x", 44), ("b", "y", 33), ("b", "y", 44), ], )) assert table2[("a", "x", 33)] == 3 / 136 assert table2[("a", "x", 44)] == 7 / 136 assert table2[("a", "y", 33)] == 11 / 136 assert table2[("a", "y", 44)] == 15 / 136 assert table2[("b", "x", 33)] == 19 / 136 assert table2[("b", "x", 44)] == 23 / 136 assert table2[("b", "y", 33)] == 27 / 136 assert table2[("b", "y", 44)] == 31 / 136 table2 = table.marginal("Edu", normalise=False) assert table2[("a", "x", 33)] == 3 assert table2[("a", "x", 44)] == 7 assert table2[("a", "y", 33)] == 11 assert table2[("a", "y", 44)] == 15 assert table2[("b", "x", 33)] == 19 assert table2[("b", "x", 44)] == 23 assert table2[("b", "y", 33)] == 27 assert table2[("b", "y", 44)] == 31 table2 = table.marginal("Etn") assert all( compare( table2.keys(), [ ("a", "x", 1), ("a", "x", 2), ("a", "y", 1), ("a", "y", 2), ("b", "x", 1), ("b", "x", 2), ("b", "y", 1), ("b", "y", 2), ], )) assert table2[("a", "x", 1)] == 4 / 136 assert table2[("a", "x", 2)] == 6 / 136 assert table2[("a", "y", 1)] == 12 / 136 assert table2[("a", "y", 2)] == 14 / 136 assert table2[("b", "x", 1)] == 20 / 136 assert table2[("b", "x", 2)] == 22 / 136 assert table2[("b", "y", 1)] == 28 / 136 assert table2[("b", "y", 2)] == 30 / 136 table2 = table.marginal("Age", "Etn") assert all(compare(table2.keys(), [("x", 1), ("x", 2), ("y", 1), ("y", 2)])) assert table2[("x", 1)] == 24 / 136 assert table2[("x", 2)] == 28 / 136 assert table2[("y", 1)] == 40 / 136 assert table2[("y", 2)] == 44 / 136 table2 = table.marginal("Age", "Sex", "Etn") assert all(compare(table2.keys(), [(1, ), (2, )])) assert table2[1] == 64 / 136 assert table2[2] == 72 / 136 # marginalize two times table2 = table.marginal("Age", "Etn") table3 = table2.marginal("Sex") assert all(compare(table3.keys(), [(1, ), (2, )])) assert table3[1] == 64 / 136 assert table3[2] == 72 / 136 # marginalize three times table2 = table.marginal("Etn") table3 = table2.marginal("Edu") table4 = table3.marginal("Sex") assert all(compare(table4.keys(), [("a", ), ("b", )])) assert table4["a"] == 36 / 136 assert table4["b"] == 100 / 136
def test_marginals_table(): # Single RV dist. with pytest.raises(ValueError): table = Table({"A": 2, "B": 3, "C": 4}) table.marginal("X1") # Two levels dist. samples = {(1, 1): 4, (1, 2): 4, (2, 1): 6, (2, 2): 6} table = Table(samples) table2 = table.marginal("X1") assert all(compare(table2.keys(), [(1, ), (2, )])) assert table2[1] == 10 / 20 assert table2[2] == 10 / 20 table2 = table.marginal("X2") assert all(compare(table2.keys(), [(1, ), (2, )])) assert table2[1] == 8 / 20 assert table2[2] == 12 / 20 samples = {("a", "x"): 4, ("a", "y"): 4, ("b", "x"): 6, ("b", "y"): 6} table = Table(samples) table2 = table.marginal("X1") assert all(compare(table2.keys(), [("x", ), ("y", )])) assert table2["x"] == 10 / 20 assert table2["y"] == 10 / 20 table2 = table.marginal("X1") assert all(compare(table2.keys(), [("x", ), ("y", )])) assert table2["x"] == 10 / 20 assert table2["y"] == 10 / 20 table2 = table.marginal("X2") assert all(compare(table2.keys(), [("a", ), ("b", )])) assert table2["a"] == 8 / 20 assert table2["b"] == 12 / 20 # Three levels dist. samples = { ("a", "x", 1): 4, ("a", "x", 2): 4, ("a", "y", 1): 6, ("a", "y", 2): 6, ("b", "x", 1): 8, ("b", "x", 2): 8, ("b", "y", 1): 10, ("b", "y", 2): 10, } table = Table(samples) table2 = table.marginal("X1") assert all(compare(table2.keys(), [("x", 1), ("x", 2), ("y", 1), ("y", 2)])) assert table2[("x", 1)] == 12 / 56 assert table2[("x", 2)] == 12 / 56 assert table2[("y", 1)] == 16 / 56 assert table2[("y", 2)] == 16 / 56 table2 = table.marginal("X2") assert all(compare(table2.keys(), [("a", 1), ("a", 2), ("b", 1), ("b", 2)])) assert table2[("a", 1)] == 10 / 56 assert table2[("a", 2)] == 10 / 56 assert table2[("b", 1)] == 18 / 56 assert table2[("b", 2)] == 18 / 56 table2 = table.marginal("X3") assert all( compare(table2.keys(), [("a", "x"), ("a", "y"), ("b", "x"), ("b", "y")])) assert table2[("a", "x")] == 8 / 56 assert table2[("a", "y")] == 12 / 56 assert table2[("b", "x")] == 16 / 56 assert table2[("b", "y")] == 20 / 56 table2 = table.marginal("X1", "X2") assert all(compare(table2.keys(), [(1, ), (2, )])) assert table2[1] == 28 / 56 assert table2[2] == 28 / 56 table2 = table.marginal("X1", "X3") assert all(compare(table2.keys(), [("x", ), ("y", )])) assert table2["x"] == 24 / 56 assert table2["y"] == 32 / 56 table2 = table.marginal("X2", "X3") assert all(compare(table2.keys(), [("a", ), ("b", )])) assert table2["a"] == 20 / 56 assert table2["b"] == 36 / 56 # Four levels dist. samples = { ("a", "x", 1, 33): 1, ("a", "x", 2, 33): 2, ("a", "x", 1, 44): 3, ("a", "x", 2, 44): 4, ("a", "y", 1, 33): 5, ("a", "y", 2, 33): 6, ("a", "y", 1, 44): 7, ("a", "y", 2, 44): 8, ("b", "x", 1, 33): 9, ("b", "x", 2, 33): 10, ("b", "x", 1, 44): 11, ("b", "x", 2, 44): 12, ("b", "y", 1, 33): 13, ("b", "y", 2, 33): 14, ("b", "y", 1, 44): 15, ("b", "y", 2, 44): 16, } table = Table(samples) table2 = table.marginal("X3") assert all( compare( table2.keys(), [ ("a", "x", 33), ("a", "x", 44), ("a", "y", 33), ("a", "y", 44), ("b", "x", 33), ("b", "x", 44), ("b", "y", 33), ("b", "y", 44), ], )) assert table2[("a", "x", 33)] == 3 / 136 assert table2[("a", "x", 44)] == 7 / 136 assert table2[("a", "y", 33)] == 11 / 136 assert table2[("a", "y", 44)] == 15 / 136 assert table2[("b", "x", 33)] == 19 / 136 assert table2[("b", "x", 44)] == 23 / 136 assert table2[("b", "y", 33)] == 27 / 136 assert table2[("b", "y", 44)] == 31 / 136 table2 = table.marginal("X4") assert all( compare( table2.keys(), [ ("a", "x", 1), ("a", "x", 2), ("a", "y", 1), ("a", "y", 2), ("b", "x", 1), ("b", "x", 2), ("b", "y", 1), ("b", "y", 2), ], )) assert table2[("a", "x", 1)] == 4 / 136 assert table2[("a", "x", 2)] == 6 / 136 assert table2[("a", "y", 1)] == 12 / 136 assert table2[("a", "y", 2)] == 14 / 136 assert table2[("b", "x", 1)] == 20 / 136 assert table2[("b", "x", 2)] == 22 / 136 assert table2[("b", "y", 1)] == 28 / 136 assert table2[("b", "y", 2)] == 30 / 136 table2 = table.marginal("X1", "X4") assert all(compare(table2.keys(), [("x", 1), ("x", 2), ("y", 1), ("y", 2)])) assert table2[("x", 1)] == 24 / 136 assert table2[("x", 2)] == 28 / 136 assert table2[("y", 1)] == 40 / 136 assert table2[("y", 2)] == 44 / 136 table2 = table.marginal("X1", "X2", "X4") assert all(compare(table2.keys(), [(1, ), (2, )])) assert table2[1] == 64 / 136 assert table2[2] == 72 / 136 # marginalize two times table2 = table.marginal("X1", "X4") table3 = table2.marginal("X2") assert all(compare(table3.keys(), [(1, ), (2, )])) assert table3[1] == 64 / 136 assert table3[2] == 72 / 136 # marginalize three times table2 = table.marginal("X4") table3 = table2.marginal("X3") table4 = table3.marginal("X2") assert all(compare(table4.keys(), [("a", ), ("b", )])) assert table4["a"] == 36 / 136 assert table4["b"] == 100 / 136
def test_marginal_by_name_discrete_distribution(): # Four levels dist. samples = { ("a", "x", 1, 33): 1, ("a", "x", 2, 33): 2, ("a", "x", 1, 44): 3, ("a", "x", 2, 44): 4, ("a", "y", 1, 33): 5, ("a", "y", 2, 33): 6, ("a", "y", 1, 44): 7, ("a", "y", 2, 44): 8, ("b", "x", 1, 33): 9, ("b", "x", 2, 33): 10, ("b", "x", 1, 44): 11, ("b", "x", 2, 44): 12, ("b", "y", 1, 33): 13, ("b", "y", 2, 33): 14, ("b", "y", 1, 44): 15, ("b", "y", 2, 44): 16, } disc_dist = DiscreteDistribution(samples, names=["Age", "Sex", "Edu", "Etn"]) disc_dist2 = disc_dist.marginal("Edu") assert disc_dist2.total == disc_dist.total assert all( compare( disc_dist2.keys_as_list(), [ ("a", "x", 33), ("a", "x", 44), ("a", "y", 33), ("a", "y", 44), ("b", "x", 33), ("b", "x", 44), ("b", "y", 33), ("b", "y", 44), ], )) assert disc_dist2[("a", "x", 33)] == 3 assert disc_dist2[("a", "x", 44)] == 7 assert disc_dist2[("a", "y", 33)] == 11 assert disc_dist2[("a", "y", 44)] == 15 assert disc_dist2[("b", "x", 33)] == 19 assert disc_dist2[("b", "x", 44)] == 23 assert disc_dist2[("b", "y", 33)] == 27 assert disc_dist2[("b", "y", 44)] == 31 assert disc_dist2.probability(("a", "x", 33)) == 3 / 136 assert disc_dist2.probability(("a", "x", 44)) == 7 / 136 assert disc_dist2.probability(("a", "y", 33)) == 11 / 136 assert disc_dist2.probability(("a", "y", 44)) == 15 / 136 assert disc_dist2.probability(("b", "x", 33)) == 19 / 136 assert disc_dist2.probability(("b", "x", 44)) == 23 / 136 assert disc_dist2.probability(("b", "y", 33)) == 27 / 136 assert disc_dist2.probability(("b", "y", 44)) == 31 / 136 disc_dist2 = disc_dist.marginal("Etn") assert disc_dist2.total == disc_dist.total assert all( compare( disc_dist2.keys_as_list(), [ ("a", "x", 1), ("a", "x", 2), ("a", "y", 1), ("a", "y", 2), ("b", "x", 1), ("b", "x", 2), ("b", "y", 1), ("b", "y", 2), ], )) assert disc_dist2[("a", "x", 1)] == 4 assert disc_dist2[("a", "x", 2)] == 6 assert disc_dist2[("a", "y", 1)] == 12 assert disc_dist2[("a", "y", 2)] == 14 assert disc_dist2[("b", "x", 1)] == 20 assert disc_dist2[("b", "x", 2)] == 22 assert disc_dist2[("b", "y", 1)] == 28 assert disc_dist2[("b", "y", 2)] == 30 assert disc_dist2.probability(("a", "x", 1)) == 4 / 136 assert disc_dist2.probability(("a", "x", 2)) == 6 / 136 assert disc_dist2.probability(("a", "y", 1)) == 12 / 136 assert disc_dist2.probability(("a", "y", 2)) == 14 / 136 assert disc_dist2.probability(("b", "x", 1)) == 20 / 136 assert disc_dist2.probability(("b", "x", 2)) == 22 / 136 assert disc_dist2.probability(("b", "y", 1)) == 28 / 136 assert disc_dist2.probability(("b", "y", 2)) == 30 / 136 disc_dist2 = disc_dist.marginal("Age", "Etn") assert disc_dist2.total == disc_dist.total assert all( compare(disc_dist2.keys_as_list(), [("x", 1), ("x", 2), ("y", 1), ("y", 2)])) assert disc_dist2[("x", 1)] == 24 assert disc_dist2[("x", 2)] == 28 assert disc_dist2[("y", 1)] == 40 assert disc_dist2[("y", 2)] == 44 assert disc_dist2.probability(("x", 1)) == 24 / 136 assert disc_dist2.probability(("x", 2)) == 28 / 136 assert disc_dist2.probability(("y", 1)) == 40 / 136 assert disc_dist2.probability(("y", 2)) == 44 / 136 disc_dist2 = disc_dist.marginal("Age", "Sex", "Etn") assert disc_dist2.total == disc_dist.total assert all(compare(disc_dist2.keys_as_list(), [1, 2])) assert disc_dist2[1] == 64 assert disc_dist2[2] == 72 assert disc_dist2.probability(1) == 64 / 136 assert disc_dist2.probability(2) == 72 / 136 # marginalize two times disc_dist2 = disc_dist.marginal("Age", "Etn") disc_dist3 = disc_dist2.marginal("Sex") assert disc_dist3.total == disc_dist.total assert all(compare(disc_dist3.keys_as_list(), [1, 2])) assert disc_dist3[1] == 64 assert disc_dist3[2] == 72 assert disc_dist3.probability(1) == 64 / 136 assert disc_dist3.probability(2) == 72 / 136 # marginalize three times disc_dist2 = disc_dist.marginal("Etn") disc_dist3 = disc_dist2.marginal("Edu") disc_dist4 = disc_dist3.marginal("Sex") assert disc_dist4.total == disc_dist.total assert all(compare(disc_dist4.keys_as_list(), ["a", "b"])) assert disc_dist4["a"] == 36 assert disc_dist4["b"] == 100 assert disc_dist4.probability("a") == 36 / 136 assert disc_dist4.probability("b") == 100 / 136
def test_invalid_json_exception(): ne = InvalidJSONException(data={'some': 'value'}) compare(str(ne), u"{'message': 'Mime-type is JSON, but no JSON object could be decoded.', 'data': {'some': 'value'}}")
def test_invalid_mimetype_exception(): ne = InvalidMimetypeException(mimetype='text/xml') compare(str(ne), u"{'mimetype': 'text/xml', 'message': 'Mime-type has to be application/json.'}")
def test_marginals_discrete_distribution(): # Single RV dist. with pytest.raises(ValueError): disc_dist = DiscreteDistribution({"A": 2, "B": 3, "C": 4}) disc_dist.marginal("X1") # Two levels dist. samples = {(1, 1): 4, (1, 2): 4, (2, 1): 6, (2, 2): 6} disc_dist = DiscreteDistribution(samples) disc_dist2 = disc_dist.marginal("X1") assert disc_dist2.total == disc_dist.total assert all(compare(disc_dist2.keys_as_list(), [1, 2])) assert disc_dist2[1] == 10 assert disc_dist2[2] == 10 assert disc_dist2.probability(1) == 0.5 assert disc_dist2.probability(2) == 0.5 disc_dist2 = disc_dist.marginal("X2") assert disc_dist2.total == disc_dist.total assert all(compare(disc_dist2.keys_as_list(), [1, 2])) assert disc_dist2[1] == 8 assert disc_dist2[2] == 12 assert disc_dist2.probability(1) == 0.4 assert disc_dist2.probability(2) == 0.6 samples = {("a", "x"): 4, ("a", "y"): 4, ("b", "x"): 6, ("b", "y"): 6} disc_dist = DiscreteDistribution(samples) disc_dist2 = disc_dist.marginal("X1") assert disc_dist2.total == disc_dist.total assert all(compare(disc_dist2.keys_as_list(), ["x", "y"])) assert disc_dist2["x"] == 10 assert disc_dist2["y"] == 10 assert disc_dist2.probability("x") == 0.5 assert disc_dist2.probability("y") == 0.5 disc_dist2 = disc_dist.marginal("X1") assert disc_dist2.total == disc_dist.total assert all(compare(disc_dist2.keys_as_list(), ["x", "y"])) assert disc_dist2["x"] == 10 assert disc_dist2["y"] == 10 assert disc_dist2.probability("x") == 0.5 assert disc_dist2.probability("y") == 0.5 disc_dist2 = disc_dist.marginal("X2") assert disc_dist2.total == disc_dist.total assert all(compare(disc_dist2.keys_as_list(), ["a", "b"])) assert disc_dist2["a"] == 8 assert disc_dist2["b"] == 12 assert disc_dist2.probability("a") == 0.4 assert disc_dist2.probability("b") == 0.6 # Three levels dist. samples = { ("a", "x", 1): 4, ("a", "x", 2): 4, ("a", "y", 1): 6, ("a", "y", 2): 6, ("b", "x", 1): 8, ("b", "x", 2): 8, ("b", "y", 1): 10, ("b", "y", 2): 10, } disc_dist = DiscreteDistribution(samples) disc_dist2 = disc_dist.marginal("X1") assert disc_dist2.total == disc_dist.total assert all( compare(disc_dist2.keys_as_list(), [("x", 1), ("x", 2), ("y", 1), ("y", 2)])) assert disc_dist2[("x", 1)] == 12 assert disc_dist2[("x", 2)] == 12 assert disc_dist2[("y", 1)] == 16 assert disc_dist2[("y", 2)] == 16 assert disc_dist2.probability(("x", 1)) == 12 / 56 assert disc_dist2.probability(("x", 2)) == 12 / 56 assert disc_dist2.probability(("y", 1)) == 16 / 56 assert disc_dist2.probability(("y", 2)) == 16 / 56 disc_dist2 = disc_dist.marginal("X2") assert disc_dist2.total == disc_dist.total assert all( compare(disc_dist2.keys_as_list(), [("a", 1), ("a", 2), ("b", 1), ("b", 2)])) assert disc_dist2[("a", 1)] == 10 assert disc_dist2[("a", 2)] == 10 assert disc_dist2[("b", 1)] == 18 assert disc_dist2[("b", 2)] == 18 assert disc_dist2.probability(("a", 1)) == 10 / 56 assert disc_dist2.probability(("a", 2)) == 10 / 56 assert disc_dist2.probability(("b", 1)) == 18 / 56 assert disc_dist2.probability(("b", 2)) == 18 / 56 disc_dist2 = disc_dist.marginal("X3") assert disc_dist2.total == disc_dist.total assert all( compare(disc_dist2.keys_as_list(), [("a", "x"), ("a", "y"), ("b", "x"), ("b", "y")])) assert disc_dist2[("a", "x")] == 8 assert disc_dist2[("a", "y")] == 12 assert disc_dist2[("b", "x")] == 16 assert disc_dist2[("b", "y")] == 20 assert disc_dist2.probability(("a", "x")) == 8 / 56 assert disc_dist2.probability(("a", "y")) == 12 / 56 assert disc_dist2.probability(("b", "x")) == 16 / 56 assert disc_dist2.probability(("b", "y")) == 20 / 56 disc_dist2 = disc_dist.marginal("X1", "X2") assert disc_dist2.total == disc_dist.total assert all(compare(disc_dist2.keys_as_list(), [1, 2])) assert disc_dist2[1] == 28 assert disc_dist2[2] == 28 assert disc_dist2.probability(1) == 28 / 56 assert disc_dist2.probability(2) == 28 / 56 disc_dist2 = disc_dist.marginal("X1", "X3") assert disc_dist2.total == disc_dist.total assert all(compare(disc_dist2.keys_as_list(), ["x", "y"])) assert disc_dist2["x"] == 24 assert disc_dist2["y"] == 32 assert disc_dist2.probability("x") == 24 / 56 assert disc_dist2.probability("y") == 32 / 56 disc_dist2 = disc_dist.marginal("X2", "X3") assert disc_dist2.total == disc_dist.total assert all(compare(disc_dist2.keys_as_list(), ["a", "b"])) assert disc_dist2["a"] == 20 assert disc_dist2["b"] == 36 assert disc_dist2.probability("a") == 20 / 56 assert disc_dist2.probability("b") == 36 / 56 # Four levels dist. samples = { ("a", "x", 1, 33): 1, ("a", "x", 2, 33): 2, ("a", "x", 1, 44): 3, ("a", "x", 2, 44): 4, ("a", "y", 1, 33): 5, ("a", "y", 2, 33): 6, ("a", "y", 1, 44): 7, ("a", "y", 2, 44): 8, ("b", "x", 1, 33): 9, ("b", "x", 2, 33): 10, ("b", "x", 1, 44): 11, ("b", "x", 2, 44): 12, ("b", "y", 1, 33): 13, ("b", "y", 2, 33): 14, ("b", "y", 1, 44): 15, ("b", "y", 2, 44): 16, } disc_dist = DiscreteDistribution(samples) disc_dist2 = disc_dist.marginal("X3") assert disc_dist2.total == disc_dist.total assert all( compare( disc_dist2.keys_as_list(), [ ("a", "x", 33), ("a", "x", 44), ("a", "y", 33), ("a", "y", 44), ("b", "x", 33), ("b", "x", 44), ("b", "y", 33), ("b", "y", 44), ], )) assert disc_dist2[("a", "x", 33)] == 3 assert disc_dist2[("a", "x", 44)] == 7 assert disc_dist2[("a", "y", 33)] == 11 assert disc_dist2[("a", "y", 44)] == 15 assert disc_dist2[("b", "x", 33)] == 19 assert disc_dist2[("b", "x", 44)] == 23 assert disc_dist2[("b", "y", 33)] == 27 assert disc_dist2[("b", "y", 44)] == 31 assert disc_dist2.probability(("a", "x", 33)) == 3 / 136 assert disc_dist2.probability(("a", "x", 44)) == 7 / 136 assert disc_dist2.probability(("a", "y", 33)) == 11 / 136 assert disc_dist2.probability(("a", "y", 44)) == 15 / 136 assert disc_dist2.probability(("b", "x", 33)) == 19 / 136 assert disc_dist2.probability(("b", "x", 44)) == 23 / 136 assert disc_dist2.probability(("b", "y", 33)) == 27 / 136 assert disc_dist2.probability(("b", "y", 44)) == 31 / 136 disc_dist2 = disc_dist.marginal("X4") assert disc_dist2.total == disc_dist.total assert all( compare( disc_dist2.keys_as_list(), [ ("a", "x", 1), ("a", "x", 2), ("a", "y", 1), ("a", "y", 2), ("b", "x", 1), ("b", "x", 2), ("b", "y", 1), ("b", "y", 2), ], )) assert disc_dist2[("a", "x", 1)] == 4 assert disc_dist2[("a", "x", 2)] == 6 assert disc_dist2[("a", "y", 1)] == 12 assert disc_dist2[("a", "y", 2)] == 14 assert disc_dist2[("b", "x", 1)] == 20 assert disc_dist2[("b", "x", 2)] == 22 assert disc_dist2[("b", "y", 1)] == 28 assert disc_dist2[("b", "y", 2)] == 30 assert disc_dist2.probability(("a", "x", 1)) == 4 / 136 assert disc_dist2.probability(("a", "x", 2)) == 6 / 136 assert disc_dist2.probability(("a", "y", 1)) == 12 / 136 assert disc_dist2.probability(("a", "y", 2)) == 14 / 136 assert disc_dist2.probability(("b", "x", 1)) == 20 / 136 assert disc_dist2.probability(("b", "x", 2)) == 22 / 136 assert disc_dist2.probability(("b", "y", 1)) == 28 / 136 assert disc_dist2.probability(("b", "y", 2)) == 30 / 136 disc_dist2 = disc_dist.marginal("X1", "X4") assert disc_dist2.total == disc_dist.total assert all( compare(disc_dist2.keys_as_list(), [("x", 1), ("x", 2), ("y", 1), ("y", 2)])) assert disc_dist2[("x", 1)] == 24 assert disc_dist2[("x", 2)] == 28 assert disc_dist2[("y", 1)] == 40 assert disc_dist2[("y", 2)] == 44 assert disc_dist2.probability(("x", 1)) == 24 / 136 assert disc_dist2.probability(("x", 2)) == 28 / 136 assert disc_dist2.probability(("y", 1)) == 40 / 136 assert disc_dist2.probability(("y", 2)) == 44 / 136 disc_dist2 = disc_dist.marginal("X1", "X2", "X4") assert disc_dist2.total == disc_dist.total assert all(compare(disc_dist2.keys_as_list(), [1, 2])) assert disc_dist2[1] == 64 assert disc_dist2[2] == 72 assert disc_dist2.probability(1) == 64 / 136 assert disc_dist2.probability(2) == 72 / 136 # marginalize two times disc_dist2 = disc_dist.marginal("X1", "X4") disc_dist3 = disc_dist2.marginal("X2") assert disc_dist3.total == disc_dist.total assert all(compare(disc_dist3.keys_as_list(), [1, 2])) assert disc_dist3[1] == 64 assert disc_dist3[2] == 72 assert disc_dist3.probability(1) == 64 / 136 assert disc_dist3.probability(2) == 72 / 136 # marginalize three times disc_dist2 = disc_dist.marginal("X4") disc_dist3 = disc_dist2.marginal("X3") disc_dist4 = disc_dist3.marginal("X2") assert disc_dist4.total == disc_dist.total assert all(compare(disc_dist4.keys_as_list(), ["a", "b"])) assert disc_dist4["a"] == 36 assert disc_dist4["b"] == 100 assert disc_dist4.probability("a") == 36 / 136 assert disc_dist4.probability("b") == 100 / 136
def test_reduce_by_name_discrete_distribution(): samples = { ("a", "x", 1, 33): 1, ("a", "x", 2, 33): 2, ("a", "x", 1, 44): 3, ("a", "x", 2, 44): 4, ("a", "y", 1, 33): 5, ("a", "y", 2, 33): 6, ("a", "y", 1, 44): 7, ("a", "y", 2, 44): 8, ("b", "x", 1, 33): 9, ("b", "x", 2, 33): 10, ("b", "x", 1, 44): 11, ("b", "x", 2, 44): 12, ("b", "y", 1, 33): 13, ("b", "y", 2, 33): 14, ("b", "y", 1, 44): 15, ("b", "y", 2, 44): 16, } disc_dist = DiscreteDistribution(samples) reduced_dist = disc_dist.reduce(X2="y") assert reduced_dist.rvs.size == 3 assert all(compare(reduced_dist.rvs.names, ["X1", "X3", "X4"])) assert reduced_dist[("a", 1, 33)] == 5 assert reduced_dist[("b", 2, 44)] == 16 assert reduced_dist.frequency(("a", 1, 33)) == 5 assert reduced_dist.frequency(("b", 2, 44)) == 16 assert reduced_dist.probability(("a", 1, 33)) == 5 / 84 assert reduced_dist.probability(("b", 2, 44)) == 16 / 84 reduced_dist = disc_dist.reduce(X2="y", X3=1) assert reduced_dist.rvs.size == 2 assert all(compare(reduced_dist.rvs.names, ["X1", "X4"])) assert reduced_dist[("a", 33)] == 5 assert reduced_dist[("b", 44)] == 15 assert reduced_dist.frequency(("a", 33)) == 5 assert reduced_dist.frequency(("b", 44)) == 15 assert reduced_dist.probability(("a", 33)) == 5 / 40 assert reduced_dist.probability(("b", 44)) == 15 / 40 reduced_dist = disc_dist.reduce(X1="b", X3=1, X4=44) assert reduced_dist.rvs.size == 1 assert all(compare(reduced_dist.rvs.names, ["X2"])) assert reduced_dist["x"] == 11 assert reduced_dist["y"] == 15 assert reduced_dist.frequency("x") == 11 assert reduced_dist.frequency("y") == 15 assert reduced_dist.probability("x") == 11 / 26 assert reduced_dist.probability("y") == 15 / 26 disc_dist = DiscreteDistribution(samples, names=["Y", "Z", "W", "X"]) reduced_dist = disc_dist.reduce(Z="y") assert reduced_dist.rvs.size == 3 assert all(compare(reduced_dist.rvs.names, ["Y", "W", "X"])) assert reduced_dist[("a", 1, 33)] == 5 assert reduced_dist[("b", 2, 44)] == 16 assert reduced_dist.frequency(("a", 1, 33)) == 5 assert reduced_dist.frequency(("b", 2, 44)) == 16 assert reduced_dist.probability(("a", 1, 33)) == 5 / 84 assert reduced_dist.probability(("b", 2, 44)) == 16 / 84 reduced_dist = disc_dist.reduce(Z="y", W=1) assert reduced_dist.rvs.size == 2 assert all(compare(reduced_dist.rvs.names, ["Y", "X"])) assert reduced_dist[("a", 33)] == 5 assert reduced_dist[("b", 44)] == 15 assert reduced_dist.frequency(("a", 33)) == 5 assert reduced_dist.frequency(("b", 44)) == 15 assert reduced_dist.probability(("a", 33)) == 5 / 40 assert reduced_dist.probability(("b", 44)) == 15 / 40 reduced_dist = disc_dist.reduce(Y="b", W=1, X=44) assert reduced_dist.rvs.size == 1 assert all(compare(reduced_dist.rvs.names, ["Z"])) assert reduced_dist["x"] == 11 assert reduced_dist["y"] == 15 assert reduced_dist.frequency("x") == 11 assert reduced_dist.frequency("y") == 15 assert reduced_dist.probability("x") == 11 / 26 assert reduced_dist.probability("y") == 15 / 26
def test_three_levels_discrete_distribution(): dist = DiscreteDistribution({("A", "y", 1): 2}) both_levels = zip(dist.levels(), [["A"], ["y"], [1]]) for levels_1, levels_2 in both_levels: assert all(compare(levels_1, levels_2)) assert dist.rvs.size == 3 assert dist[("A", "y", 1)] == 2 assert dist[("A", "x", 2)] == 0 assert all(compare(dist.frequencies(normalised=True), [1])) assert all(compare(dist.frequencies(normalised=False), [2])) assert dist.prob(X1="A", X2="y", X3=1) == 1 assert dist.prob(X1="A", X2="y", X3=2) == 0 dist = DiscreteDistribution({("A", "x", 1): 2, ("A", "y", 1): 2}) both_levels = zip(dist.levels(), [["A"], ["x", "y"], [1]]) for levels_1, levels_2 in both_levels: assert all(compare(levels_1, levels_2)) assert dist.rvs.size == 3 assert dist[("A", "x", 1)] == 2 assert dist[("A", "y", 1)] == 2 assert all(compare(dist.frequencies(normalised=True), [0.5, 0.5])) assert all(compare(dist.frequencies(normalised=False), [2, 2])) assert dist.prob(X1="A", X2="x", X3=1) == 0.5 assert dist.prob(X1="A", X2="y", X3=1) == 0.5 assert dist.prob(X1="A", X2="y", X3=2) == 0 dist = DiscreteDistribution({("A", "x", 1): 2, ("B", "y", 2): 2}) both_levels = zip(dist.levels(), [["A", "B"], ["x", "y"], [1, 2]]) for levels_1, levels_2 in both_levels: assert all(compare(levels_1, levels_2)) assert dist.rvs.size == 3 assert dist[("A", "x", 1)] == 2 assert dist[("B", "y", 2)] == 2 assert dist[("B", "y", 3)] == 0 assert all(compare(dist.frequencies(normalised=True), [0.5, 0.5])) assert all(compare(dist.frequencies(normalised=False), [2, 2])) assert dist.prob(X1="A", X2="x", X3=1) == 0.5 assert dist.prob(X1="B", X2="y", X3=2) == 0.5 dist = DiscreteDistribution({ ("A", "x", 1): 1, ("A", "y", 2): 2, ("B", "x", 1): 3 }) both_levels = zip(dist.levels(), [["A", "B"], ["x", "y"], [1, 2]]) for levels_1, levels_2 in both_levels: assert all(compare(levels_1, levels_2)) assert dist.rvs.size == 3 assert dist[("A", "x", 1)] == 1 assert dist[("A", "y", 2)] == 2 assert dist[("B", "x", 1)] == 3 assert dist["B"] == 0 assert all( compare(dist.frequencies(normalised=True), [1 / 6, 2 / 6, 3 / 6])) assert all(compare(dist.frequencies(normalised=False), [1, 2, 3])) assert dist.prob(X1="A", X2="x", X3=1) == 1 / 6 assert dist.prob(X1="A", X2="y", X3=2) == 2 / 6 assert dist.prob(X1="B", X2="x", X3=1) == 3 / 6 assert dist.prob(X1="B", X2="y", X3=2) == 0 dist = DiscreteDistribution({ ("A", "x", 1): 1, ("A", "y", 2): 2, ("B", "x", 1): 3, ("B", "y", 2): 4 }) both_levels = zip(dist.levels(), [["A", "B"], ["x", "y"], [1, 2]]) for levels_1, levels_2 in both_levels: assert all(compare(levels_1, levels_2)) assert dist.rvs.size == 3 assert dist[("A", "x", 1)] == 1 assert dist[("A", "y", 2)] == 2 assert dist[("B", "x", 1)] == 3 assert dist[("B", "y", 2)] == 4 assert all(compare(dist.frequencies(normalised=True), [0.1, 0.2, 0.3, 0.4])) assert all(compare(dist.frequencies(normalised=False), [1, 2, 3, 4])) assert dist.prob(X1="A", X2="x", X3=1) == 1 / 10 assert dist.prob(X1="A", X2="y", X3=2) == 2 / 10 assert dist.prob(X1="B", X2="x", X3=1) == 3 / 10 assert dist.prob(X1="B", X2="y", X3=2) == 4 / 10 dist = DiscreteDistribution({ ("A", "x", 1): 1, ("A", "y", 2): 2, ("B", "x", 1): 3, ("B", "y", 2): 4, ("C", "y", 3): 5, }) both_levels = zip(dist.levels(), [["A", "B", "C"], ["x", "y"], [1, 2, 3]]) for levels_1, levels_2 in both_levels: assert all(compare(levels_1, levels_2)) assert dist.rvs.size == 3 assert dist[("A", "x", 1)] == 1 assert dist[("A", "y", 2)] == 2 assert dist[("B", "x", 1)] == 3 assert dist[("B", "y", 2)] == 4 assert dist[("C", "y", 3)] == 5 assert all( compare(dist.frequencies(normalised=True), [1 / 15, 2 / 15, 3 / 15, 4 / 15, 5 / 15])) assert all(compare(dist.frequencies(normalised=False), [1, 2, 3, 4, 5])) assert dist.prob(X1="A", X2="x", X3=1) == 1 / 15 assert dist.prob(X1="A", X2="y", X3=2) == 2 / 15 assert dist.prob(X1="B", X2="x", X3=1) == 3 / 15 assert dist.prob(X1="B", X2="y", X3=2) == 4 / 15 assert dist.prob(X1="C", X2="y", X3=3) == 5 / 15 dist = DiscreteDistribution({ ("A", "x", 1): 1, ("A", "y", 2): 2, ("B", "x", 3): 3, ("B", "y", 3): 4, ("C", "z", 4): 5, }) both_levels = zip(dist.levels(), [["A", "B", "C"], ["x", "y", "z"], [1, 2, 3, 4]]) for levels_1, levels_2 in both_levels: assert all(compare(levels_1, levels_2)) assert dist.rvs.size == 3 assert dist[("A", "x", 1)] == 1 assert dist[("A", "y", 2)] == 2 assert dist[("B", "x", 3)] == 3 assert dist[("B", "y", 3)] == 4 assert dist[("C", "z", 4)] == 5 assert all( compare(dist.frequencies(normalised=True), [1 / 15, 2 / 15, 3 / 15, 4 / 15, 5 / 15])) assert all(compare(dist.frequencies(normalised=False), [1, 2, 3, 4, 5])) assert dist.prob(X1="A", X2="x", X3=1) == 1 / 15 assert dist.prob(X1="A", X2="y", X3=2) == 2 / 15 assert dist.prob(X1="B", X2="x", X3=3) == 3 / 15 assert dist.prob(X1="B", X2="y", X3=3) == 4 / 15 assert dist.prob(X1="C", X2="z", X3=4) == 5 / 15