def test__example_1_3(self): butler = Discrete.from_probs(data={ 'yes': 0.6, 'no': 0.4 }, variables='butler') maid = Discrete.from_probs(data={ 'yes': 0.2, 'no': 0.8 }, variables='maid') butler__and__maid = butler * maid knife__given__butler__and__maid = Conditional.from_probs( data={ ('yes', 'no', 'no'): 0.3, ('yes', 'no', 'yes'): 0.2, ('yes', 'yes', 'no'): 0.6, ('yes', 'yes', 'yes'): 0.1, ('no', 'no', 'no'): 0.7, ('no', 'no', 'yes'): 0.8, ('no', 'yes', 'no'): 0.4, ('no', 'yes', 'yes'): 0.9, }, joint_variables='knife_used', conditional_variables=['butler', 'maid']) butler__and__maid__and__knife = (knife__given__butler__and__maid * butler__and__maid) butler__given__knife = butler__and__maid__and__knife.given( knife_used='yes').p(butler='yes') self.assertAlmostEqual(0.728, butler__given__knife, 3)
def test__example_1_2(self): has_kj = Discrete.from_probs(data={ 'yes': 1e-5, 'no': 1 - 1e-5 }, variables='has_kj') self.assertEqual(1e-5, has_kj.p(has_kj='yes')) self.assertEqual(1 - 1e-5, has_kj.p(has_kj='no')) eats_hbs__given__has_kj = Conditional.from_probs( data={ ('yes', 'yes'): 0.9, ('no', 'yes'): 0.1 }, joint_variables='eats_hbs', conditional_variables='has_kj') eats_hbs = Discrete.from_probs(data={ 'yes': 0.5, 'no': 0.5 }, variables='eats_hbs') # 1 has_kj__given__eats_hbs = eats_hbs__given__has_kj * has_kj / eats_hbs self.assertEqual( 1.8e-5, has_kj__given__eats_hbs.p(has_kj='yes', eats_hbs='yes')) # 2 eats_hbs = Discrete.from_probs(data={ 'yes': 0.001, 'no': 0.999 }, variables='eats_hbs') has_kj__given__eats_hbs = eats_hbs__given__has_kj * has_kj / eats_hbs self.assertEqual( 9 / 1000, has_kj__given__eats_hbs.p(has_kj='yes', eats_hbs='yes'))
def setUp(self) -> None: self.education = Discrete.from_counts( data={ ('Male', 'Never finished high school'): 112, ('Male', 'High school'): 231, ('Male', 'College'): 595, ('Male', 'Graduate school'): 242, ('Female', 'Never finished high school'): 136, ('Female', 'High school'): 189, ('Female', 'College'): 763, ('Female', 'Graduate school'): 172, }, variables=['gender', 'highest_education']) self.education__total = 112 + 231 + 595 + 242 + 136 + 189 + 763 + 172 self.total__high_school = 231 + 189 self.coin_dist = Discrete.from_probs( data={ ('H', 'H', 1, 1): 0.25, ('H', 'T', 1, 0): 0.25, ('T', 'H', 1, 0): 0.25, ('T', 'T', 0, 1): 0.25 }, variables=['coin_1', 'coin_2', 'x', 'y'])
def test_mode_nd_categorical(self): counts = Series({('a', 'b'): 1, ('c', 'd'): 2, ('e', 'f'): 3}) discrete = Discrete.from_counts(counts, variables=['ace', 'bdf']) expected = DataFrame([{'ace': 'e', 'bdf': 'f'}]) actual = discrete.mode() self.assertTrue(expected.equals(actual))
def setUp(self) -> None: self.darts = Discrete(data=Series(index=Index(data=range(1, 21), name='region'), data=1 / 20), variables='region', states=list(range(1, 21)))
def test_mode_nd_multi_value(self): counts = Series({('a', 'b'): 1, ('c', 'd'): 3, ('e', 'f'): 3}) expected = DataFrame({'ace': ['c', 'e'], 'bdf': ['d', 'f']}, ) discrete = Discrete.from_counts(counts, variables=['ace', 'bdf']) actual = discrete.mode() self.assertTrue(expected.equals(actual))
def test_max_numeric(self): discrete = Discrete.from_probs(data={ 0: 0.7, 1000: 0.2, 2000: 0.1 }, variables='a') self.assertAlmostEqual(2000, discrete.max()) discrete_2 = Discrete.from_probs(data={ 0: 0.7, 1000: 0.3, 2000: 0 }, variables='a') self.assertAlmostEqual(1000, discrete_2.max())
def test__example_1_7(self): c__given__a__and__b = Conditional.binary_from_probs( data={ (0, 0): 0.1, (0, 1): 0.99, (1, 0): 0.8, (1, 1): 0.25, }, joint_variable='C', conditional_variables=['A', 'B']) a = Discrete.binary(0.65, 'A') b = Discrete.binary(0.77, 'B') a__and__b = a * b a__and__b__and__c = a__and__b * c__given__a__and__b self.assertAlmostEqual(0.8436, a__and__b__and__c.given(C=0).p(A=1), 4)
def test__example_1_4(self): occupied__given__alice__and__bob = Conditional.from_probs( { (True, False, False): 1, (True, False, True): 1, (True, True, False): 1, (True, True, True): 0, }, joint_variables='occupied', conditional_variables=['alice', 'bob']) alice__and__bob = Discrete.from_probs( { (False, False): 0.25, (False, True): 0.25, (True, False): 0.25, (True, True): 0.25, }, variables=['alice', 'bob']) alice__and__bob__and__occupied = (occupied__given__alice__and__bob * alice__and__bob) self.assertEqual( 1, alice__and__bob__and__occupied.given(alice=True, occupied=True).p(bob=False))
def test_from_counts__1_var__vars_as_arg(self): counts = Series({'a': 1, 'b': 2, 'c': 3}) discrete = Discrete.from_counts(counts, variables='abc') self.assertEqual(['abc'], discrete.variables) self.assertEqual({'abc': ['a', 'b', 'c']}, discrete.states) self.assertEqual(1 / 6, discrete.p(abc='a')) self.assertEqual(2 / 6, discrete.p(abc='b')) self.assertEqual(3 / 6, discrete.p(abc='c'))
def test_max_categorical(self): discrete = Discrete.from_probs(data={ 'a': 0.7, 'b': 0.2, 'c': 0.1 }, variables='x') self.assertRaises(TypeError, discrete.max)
def test_mode_1d_numeric(self): discrete = Discrete.from_probs(data={ 0: 0.7, 1000: 0.2, 2000: 0.1 }, variables='a') self.assertEqual(0, discrete.mode())
def setUp(self) -> None: # cookies self.bowl_1_and_chocolate = 0.125 self.bowl_1_and_vanilla = 0.375 self.bowl_2_and_chocolate = 0.25 self.bowl_2_and_vanilla = 0.25 cookie_data = TestChapter01.make_cookies_observations() self.cookies = Discrete.from_observations(cookie_data) self.vanilla = self.cookies.p(flavor='vanilla') self.vanilla__bowl_1 = self.cookies.given(bowl='bowl 1').p( flavor='vanilla') self.vanilla__bowl_2 = self.cookies.given(bowl='bowl 2').p( flavor='vanilla') self.bowl = Discrete.from_probs({ 'bowl 1': 0.5, 'bowl 2': 0.5 }, variables=['bowl']) self.bowl_1 = self.bowl.p(bowl='bowl 1') self.bowl_2 = self.bowl.p(bowl='bowl 2') # m & m's self.mix_1994 = Discrete.from_probs( { 'brown': 0.3, 'yellow': 0.2, 'red': 0.2, 'green': 0.1, 'orange': 0.1, 'tan': 0.1 }, variables='color') self.mix_1996 = Discrete.from_probs( { 'blue': 0.24, 'green': 0.2, 'orange': 0.16, 'yellow': 0.14, 'red': 0.13, 'brown': 0.13 }, variables='color') self.bag = Discrete.from_probs({1994: 0.5, 1996: 0.5}, variables='bag')
def test_from_counts__1_var__vars_on_index(self): counts = Series({'a': 1, 'b': 2, 'c': 3}) counts.index.name = 'abc' discrete = Discrete.from_counts(counts) self.assertEqual(['abc'], discrete.variables) self.assertEqual({'abc': ['a', 'b', 'c']}, discrete.states) self.assertEqual(1 / 6, discrete.p(abc='a')) self.assertEqual(2 / 6, discrete.p(abc='b')) self.assertEqual(3 / 6, discrete.p(abc='c'))
def test_mean_numeric(self): discrete = Discrete.from_probs(data={ 0: 0.7, 1000: 0.2, 2000: 0.1 }, variables='a') self.assertAlmostEqual(0 * 0.7 + 1000 * 0.2 + 2000 * 0.1, discrete.mean())
def test_from_probs__with_dict(self): bowl = Discrete.from_probs({ 'bowl 1': 0.5, 'bowl 2': 0.5 }, variables=['bowl']) self.assertIsInstance(bowl, Discrete) mix_1994 = Discrete.from_probs( { 'brown': 0.3, 'yellow': 0.2, 'red': 0.2, 'green': 0.1, 'orange': 0.1, 'tan': 0.1 }, variables='color') self.assertIsInstance(mix_1994, Discrete)
def test_from_observations__1_var(self): observations = DataFrame({ 'ace': ['a', 'c', 'c', 'e', 'e', 'e'], }) discrete = Discrete.from_observations(observations) self.assertEqual(['ace'], discrete.variables) self.assertEqual({'ace': ['a', 'c', 'e']}, discrete.states) self.assertEqual(1 / 6, discrete.p(ace='a')) self.assertEqual(2 / 6, discrete.p(ace='c')) self.assertEqual(3 / 6, discrete.p(ace='e'))
def test_from_counts__2_vars__states_as_arg(self): counts = Series({('a', 'b'): 1, ('c', 'd'): 2, ('e', 'f'): 3}) counts.index.names = ['ace', 'bdf'] states = {'ace': ['a', 'c', 'e', 'g'], 'bdf': ['b', 'd', 'f', 'h']} discrete = Discrete.from_counts(counts, states=states) self.assertEqual(['ace', 'bdf'], discrete.variables) self.assertEqual(states, discrete.states) self.assertEqual(1 / 6, discrete.p(ace='a', bdf='b')) self.assertEqual(2 / 6, discrete.p(ace='c', bdf='d')) self.assertEqual(3 / 6, discrete.p(ace='e', bdf='f'))
def test_from_observations__1_var__replace_vars(self): observations = DataFrame({ 'ace': ['a', 'c', 'c', 'e', 'e', 'e'], }) discrete = Discrete.from_observations(observations, variables='ACE') self.assertEqual(['ACE'], discrete.variables) self.assertEqual({'ACE': ['a', 'c', 'e']}, discrete.states) self.assertEqual(1 / 6, discrete.p(ACE='a')) self.assertEqual(2 / 6, discrete.p(ACE='c')) self.assertEqual(3 / 6, discrete.p(ACE='e'))
def test__1_3_1(self): t = Discrete.from_observations(data=DataFrame({ 't': [s_a + s_b for s_a, s_b in product(range(1, 7), range(1, 7))] })) s_a__s_b = Discrete.from_probs(data={ (a, b): 1 / 36 for a, b in product(range(1, 7), range(1, 7)) }, variables=['s_a', 's_b']) t_9__given__s_a__s_b = Conditional.from_probs( data={(9, a, b): int(a + b == 9) for a, b in product(range(1, 7), range(1, 7))}, joint_variables=['t'], conditional_variables=['s_a', 's_b']) t_9__s_a__s_b = t_9__given__s_a__s_b * s_a__s_b t_9 = t_9__s_a__s_b / t.p(t=9) for s_a, s_b in product(range(1, 6), range(1, 6)): self.assertEqual(t_9.p(s_a=s_a, s_b=s_b), 0.25 if s_a + s_b == 9 else 0)
def test__1_1_2(self): population_counts = Series({ 'England': 60_776_238, 'Scotland': 5_116_900, 'Wales': 2_980_700 }) population_counts.index.name = 'country' populations = Discrete.from_counts(data=population_counts) self.assertAlmostEqual(0.882, populations.p(country='England'), 3) self.assertAlmostEqual(0.074, populations.p(country='Scotland'), 3) self.assertAlmostEqual(0.043, populations.p(country='Wales'), 3) language_probs = DataFrame.from_dict( { 'England': { 'English': 0.95, 'Scottish': 0.04, 'Welsh': 0.01 }, 'Scotland': { 'English': 0.7, 'Scottish': 0.3, 'Welsh': 0.0 }, 'Wales': { 'English': 0.6, 'Scottish': 0.0, 'Welsh': 0.4 } }, orient='columns') language_probs.index.name = 'language' language_probs.columns.name = 'country' language__given__country = Conditional(data=language_probs) self.assertIsInstance(language__given__country.data, DataFrame) self.assertListEqual(['language'], language__given__country.joint_variables) self.assertListEqual(['country'], language__given__country.conditional_variables) country__language = language__given__country * populations for prob, country, language in [ (0.838, 'England', 'English'), (0.035, 'England', 'Scottish'), (0.009, 'England', 'Welsh'), (0.052, 'Scotland', 'English'), (0.022, 'Scotland', 'Scottish'), (0.0, 'Scotland', 'Welsh'), (0.026, 'Wales', 'English'), (0.0, 'Wales', 'Scottish'), (0.017, 'Wales', 'Welsh'), ]: self.assertAlmostEqual( prob, country__language.p(country=country, language=language), 3)
def test_from_counts__2_vars__vars_as_arg(self): counts = Series({('a', 'b'): 1, ('c', 'd'): 2, ('e', 'f'): 3}) discrete = Discrete.from_counts(counts, variables=['ace', 'bdf']) self.assertEqual(['ace', 'bdf'], discrete.variables) self.assertEqual({ 'ace': ['a', 'c', 'e'], 'bdf': ['b', 'd', 'f'] }, discrete.states) self.assertEqual(1 / 6, discrete.p(ace='a', bdf='b')) self.assertEqual(2 / 6, discrete.p(ace='c', bdf='d')) self.assertEqual(3 / 6, discrete.p(ace='e', bdf='f'))
def test_from_observations__2_vars__extra_states(self): observations = DataFrame({ 'ace': ['a', 'c', 'c', 'e', 'e', 'e'], 'bdf': ['b', 'd', 'd', 'f', 'f', 'f'] }) states = {'ace': ['a', 'c', 'e', 'g'], 'bdf': ['b', 'd', 'f', 'h']} discrete = Discrete.from_observations(observations, states=states) self.assertEqual(['ace', 'bdf'], discrete.variables) self.assertEqual(states, discrete.states) self.assertEqual(1 / 6, discrete.p(ace='a', bdf='b')) self.assertEqual(2 / 6, discrete.p(ace='c', bdf='d')) self.assertEqual(3 / 6, discrete.p(ace='e', bdf='f'))
def test_given_all_variables(self): expected = Discrete.binary(0, 'A_xor_B').data xor = Conditional.binary_from_probs(data={ (0, 0): 0, (0, 1): 1, (1, 0): 1, (1, 1): 0, }, joint_variable='A_xor_B', conditional_variables=['A', 'B']) actual = xor.given(A=1, B=1).data self.assertTrue(expected.equals(actual))
def test_from_observations__2_vars__replace_vars(self): observations = DataFrame({ 'ace': ['a', 'c', 'c', 'e', 'e', 'e'], 'bdf': ['b', 'd', 'd', 'f', 'f', 'f'] }) discrete = Discrete.from_observations(observations, variables=['ACE', 'BDF']) self.assertEqual(['ACE', 'BDF'], discrete.variables) self.assertEqual({ 'ACE': ['a', 'c', 'e'], 'BDF': ['b', 'd', 'f'] }, discrete.states) self.assertEqual(1 / 6, discrete.p(ACE='a', BDF='b')) self.assertEqual(2 / 6, discrete.p(ACE='c', BDF='d')) self.assertEqual(3 / 6, discrete.p(ACE='e', BDF='f'))
def test_mean_categorical(self): counts = Series({'a': 1, 'c': 2, 'e': 3}) discrete = Discrete.from_counts(counts, variables='x') self.assertRaises(TypeError, discrete.mean)
def test_mode_1d_categorical(self): counts = Series({'a': 1, 'c': 2, 'e': 3}) discrete = Discrete.from_counts(counts, variables='x') self.assertEqual('e', discrete.mode())
def test_mode_1d_multi_value(self): counts = Series({'a': 2, 'b': 2, 'c': 1}) discrete = Discrete.from_counts(counts, variables='x') self.assertListEqual(['a', 'b'], discrete.mode())