예제 #1
0
    def test__example_1_3(self):

        butler = Discrete.from_probs(data={
            'yes': 0.6,
            'no': 0.4
        },
                                     variables='butler')
        maid = Discrete.from_probs(data={
            'yes': 0.2,
            'no': 0.8
        },
                                   variables='maid')
        butler__and__maid = butler * maid
        knife__given__butler__and__maid = Conditional.from_probs(
            data={
                ('yes', 'no', 'no'): 0.3,
                ('yes', 'no', 'yes'): 0.2,
                ('yes', 'yes', 'no'): 0.6,
                ('yes', 'yes', 'yes'): 0.1,
                ('no', 'no', 'no'): 0.7,
                ('no', 'no', 'yes'): 0.8,
                ('no', 'yes', 'no'): 0.4,
                ('no', 'yes', 'yes'): 0.9,
            },
            joint_variables='knife_used',
            conditional_variables=['butler', 'maid'])
        butler__and__maid__and__knife = (knife__given__butler__and__maid *
                                         butler__and__maid)
        butler__given__knife = butler__and__maid__and__knife.given(
            knife_used='yes').p(butler='yes')
        self.assertAlmostEqual(0.728, butler__given__knife, 3)
예제 #2
0
    def test__example_1_2(self):

        has_kj = Discrete.from_probs(data={
            'yes': 1e-5,
            'no': 1 - 1e-5
        },
                                     variables='has_kj')
        self.assertEqual(1e-5, has_kj.p(has_kj='yes'))
        self.assertEqual(1 - 1e-5, has_kj.p(has_kj='no'))
        eats_hbs__given__has_kj = Conditional.from_probs(
            data={
                ('yes', 'yes'): 0.9,
                ('no', 'yes'): 0.1
            },
            joint_variables='eats_hbs',
            conditional_variables='has_kj')
        eats_hbs = Discrete.from_probs(data={
            'yes': 0.5,
            'no': 0.5
        },
                                       variables='eats_hbs')
        # 1
        has_kj__given__eats_hbs = eats_hbs__given__has_kj * has_kj / eats_hbs
        self.assertEqual(
            1.8e-5, has_kj__given__eats_hbs.p(has_kj='yes', eats_hbs='yes'))
        # 2
        eats_hbs = Discrete.from_probs(data={
            'yes': 0.001,
            'no': 0.999
        },
                                       variables='eats_hbs')
        has_kj__given__eats_hbs = eats_hbs__given__has_kj * has_kj / eats_hbs
        self.assertEqual(
            9 / 1000, has_kj__given__eats_hbs.p(has_kj='yes', eats_hbs='yes'))
예제 #3
0
    def setUp(self) -> None:

        self.education = Discrete.from_counts(
            data={
                ('Male', 'Never finished high school'): 112,
                ('Male', 'High school'): 231,
                ('Male', 'College'): 595,
                ('Male', 'Graduate school'): 242,
                ('Female', 'Never finished high school'): 136,
                ('Female', 'High school'): 189,
                ('Female', 'College'): 763,
                ('Female', 'Graduate school'): 172,
            },
            variables=['gender', 'highest_education'])
        self.education__total = 112 + 231 + 595 + 242 + 136 + 189 + 763 + 172
        self.total__high_school = 231 + 189

        self.coin_dist = Discrete.from_probs(
            data={
                ('H', 'H', 1, 1): 0.25,
                ('H', 'T', 1, 0): 0.25,
                ('T', 'H', 1, 0): 0.25,
                ('T', 'T', 0, 1): 0.25
            },
            variables=['coin_1', 'coin_2', 'x', 'y'])
예제 #4
0
    def test_mode_nd_categorical(self):

        counts = Series({('a', 'b'): 1, ('c', 'd'): 2, ('e', 'f'): 3})
        discrete = Discrete.from_counts(counts, variables=['ace', 'bdf'])
        expected = DataFrame([{'ace': 'e', 'bdf': 'f'}])
        actual = discrete.mode()
        self.assertTrue(expected.equals(actual))
예제 #5
0
    def setUp(self) -> None:

        self.darts = Discrete(data=Series(index=Index(data=range(1, 21),
                                                      name='region'),
                                          data=1 / 20),
                              variables='region',
                              states=list(range(1, 21)))
예제 #6
0
    def test_mode_nd_multi_value(self):

        counts = Series({('a', 'b'): 1, ('c', 'd'): 3, ('e', 'f'): 3})
        expected = DataFrame({'ace': ['c', 'e'], 'bdf': ['d', 'f']}, )
        discrete = Discrete.from_counts(counts, variables=['ace', 'bdf'])
        actual = discrete.mode()
        self.assertTrue(expected.equals(actual))
예제 #7
0
    def test_max_numeric(self):

        discrete = Discrete.from_probs(data={
            0: 0.7,
            1000: 0.2,
            2000: 0.1
        },
                                       variables='a')
        self.assertAlmostEqual(2000, discrete.max())
        discrete_2 = Discrete.from_probs(data={
            0: 0.7,
            1000: 0.3,
            2000: 0
        },
                                         variables='a')
        self.assertAlmostEqual(1000, discrete_2.max())
예제 #8
0
    def test__example_1_7(self):

        c__given__a__and__b = Conditional.binary_from_probs(
            data={
                (0, 0): 0.1,
                (0, 1): 0.99,
                (1, 0): 0.8,
                (1, 1): 0.25,
            },
            joint_variable='C',
            conditional_variables=['A', 'B'])
        a = Discrete.binary(0.65, 'A')
        b = Discrete.binary(0.77, 'B')
        a__and__b = a * b
        a__and__b__and__c = a__and__b * c__given__a__and__b
        self.assertAlmostEqual(0.8436, a__and__b__and__c.given(C=0).p(A=1), 4)
예제 #9
0
    def test__example_1_4(self):

        occupied__given__alice__and__bob = Conditional.from_probs(
            {
                (True, False, False): 1,
                (True, False, True): 1,
                (True, True, False): 1,
                (True, True, True): 0,
            },
            joint_variables='occupied',
            conditional_variables=['alice', 'bob'])
        alice__and__bob = Discrete.from_probs(
            {
                (False, False): 0.25,
                (False, True): 0.25,
                (True, False): 0.25,
                (True, True): 0.25,
            },
            variables=['alice', 'bob'])
        alice__and__bob__and__occupied = (occupied__given__alice__and__bob *
                                          alice__and__bob)
        self.assertEqual(
            1,
            alice__and__bob__and__occupied.given(alice=True,
                                                 occupied=True).p(bob=False))
예제 #10
0
    def test_from_counts__1_var__vars_as_arg(self):

        counts = Series({'a': 1, 'b': 2, 'c': 3})
        discrete = Discrete.from_counts(counts, variables='abc')
        self.assertEqual(['abc'], discrete.variables)
        self.assertEqual({'abc': ['a', 'b', 'c']}, discrete.states)
        self.assertEqual(1 / 6, discrete.p(abc='a'))
        self.assertEqual(2 / 6, discrete.p(abc='b'))
        self.assertEqual(3 / 6, discrete.p(abc='c'))
예제 #11
0
    def test_max_categorical(self):

        discrete = Discrete.from_probs(data={
            'a': 0.7,
            'b': 0.2,
            'c': 0.1
        },
                                       variables='x')
        self.assertRaises(TypeError, discrete.max)
예제 #12
0
    def test_mode_1d_numeric(self):

        discrete = Discrete.from_probs(data={
            0: 0.7,
            1000: 0.2,
            2000: 0.1
        },
                                       variables='a')
        self.assertEqual(0, discrete.mode())
예제 #13
0
    def setUp(self) -> None:

        # cookies
        self.bowl_1_and_chocolate = 0.125
        self.bowl_1_and_vanilla = 0.375
        self.bowl_2_and_chocolate = 0.25
        self.bowl_2_and_vanilla = 0.25
        cookie_data = TestChapter01.make_cookies_observations()
        self.cookies = Discrete.from_observations(cookie_data)
        self.vanilla = self.cookies.p(flavor='vanilla')
        self.vanilla__bowl_1 = self.cookies.given(bowl='bowl 1').p(
            flavor='vanilla')
        self.vanilla__bowl_2 = self.cookies.given(bowl='bowl 2').p(
            flavor='vanilla')
        self.bowl = Discrete.from_probs({
            'bowl 1': 0.5,
            'bowl 2': 0.5
        },
                                        variables=['bowl'])
        self.bowl_1 = self.bowl.p(bowl='bowl 1')
        self.bowl_2 = self.bowl.p(bowl='bowl 2')

        # m & m's
        self.mix_1994 = Discrete.from_probs(
            {
                'brown': 0.3,
                'yellow': 0.2,
                'red': 0.2,
                'green': 0.1,
                'orange': 0.1,
                'tan': 0.1
            },
            variables='color')
        self.mix_1996 = Discrete.from_probs(
            {
                'blue': 0.24,
                'green': 0.2,
                'orange': 0.16,
                'yellow': 0.14,
                'red': 0.13,
                'brown': 0.13
            },
            variables='color')
        self.bag = Discrete.from_probs({1994: 0.5, 1996: 0.5}, variables='bag')
예제 #14
0
    def test_from_counts__1_var__vars_on_index(self):

        counts = Series({'a': 1, 'b': 2, 'c': 3})
        counts.index.name = 'abc'
        discrete = Discrete.from_counts(counts)
        self.assertEqual(['abc'], discrete.variables)
        self.assertEqual({'abc': ['a', 'b', 'c']}, discrete.states)
        self.assertEqual(1 / 6, discrete.p(abc='a'))
        self.assertEqual(2 / 6, discrete.p(abc='b'))
        self.assertEqual(3 / 6, discrete.p(abc='c'))
예제 #15
0
    def test_mean_numeric(self):

        discrete = Discrete.from_probs(data={
            0: 0.7,
            1000: 0.2,
            2000: 0.1
        },
                                       variables='a')
        self.assertAlmostEqual(0 * 0.7 + 1000 * 0.2 + 2000 * 0.1,
                               discrete.mean())
예제 #16
0
    def test_from_probs__with_dict(self):

        bowl = Discrete.from_probs({
            'bowl 1': 0.5,
            'bowl 2': 0.5
        },
                                   variables=['bowl'])
        self.assertIsInstance(bowl, Discrete)
        mix_1994 = Discrete.from_probs(
            {
                'brown': 0.3,
                'yellow': 0.2,
                'red': 0.2,
                'green': 0.1,
                'orange': 0.1,
                'tan': 0.1
            },
            variables='color')
        self.assertIsInstance(mix_1994, Discrete)
예제 #17
0
    def test_from_observations__1_var(self):

        observations = DataFrame({
            'ace': ['a', 'c', 'c', 'e', 'e', 'e'],
        })
        discrete = Discrete.from_observations(observations)
        self.assertEqual(['ace'], discrete.variables)
        self.assertEqual({'ace': ['a', 'c', 'e']}, discrete.states)
        self.assertEqual(1 / 6, discrete.p(ace='a'))
        self.assertEqual(2 / 6, discrete.p(ace='c'))
        self.assertEqual(3 / 6, discrete.p(ace='e'))
예제 #18
0
    def test_from_counts__2_vars__states_as_arg(self):

        counts = Series({('a', 'b'): 1, ('c', 'd'): 2, ('e', 'f'): 3})
        counts.index.names = ['ace', 'bdf']
        states = {'ace': ['a', 'c', 'e', 'g'], 'bdf': ['b', 'd', 'f', 'h']}
        discrete = Discrete.from_counts(counts, states=states)
        self.assertEqual(['ace', 'bdf'], discrete.variables)
        self.assertEqual(states, discrete.states)
        self.assertEqual(1 / 6, discrete.p(ace='a', bdf='b'))
        self.assertEqual(2 / 6, discrete.p(ace='c', bdf='d'))
        self.assertEqual(3 / 6, discrete.p(ace='e', bdf='f'))
예제 #19
0
    def test_from_observations__1_var__replace_vars(self):

        observations = DataFrame({
            'ace': ['a', 'c', 'c', 'e', 'e', 'e'],
        })
        discrete = Discrete.from_observations(observations, variables='ACE')
        self.assertEqual(['ACE'], discrete.variables)
        self.assertEqual({'ACE': ['a', 'c', 'e']}, discrete.states)
        self.assertEqual(1 / 6, discrete.p(ACE='a'))
        self.assertEqual(2 / 6, discrete.p(ACE='c'))
        self.assertEqual(3 / 6, discrete.p(ACE='e'))
예제 #20
0
    def test__1_3_1(self):

        t = Discrete.from_observations(data=DataFrame({
            't': [s_a + s_b for s_a, s_b in product(range(1, 7), range(1, 7))]
        }))
        s_a__s_b = Discrete.from_probs(data={
            (a, b): 1 / 36
            for a, b in product(range(1, 7), range(1, 7))
        },
                                       variables=['s_a', 's_b'])
        t_9__given__s_a__s_b = Conditional.from_probs(
            data={(9, a, b): int(a + b == 9)
                  for a, b in product(range(1, 7), range(1, 7))},
            joint_variables=['t'],
            conditional_variables=['s_a', 's_b'])
        t_9__s_a__s_b = t_9__given__s_a__s_b * s_a__s_b
        t_9 = t_9__s_a__s_b / t.p(t=9)
        for s_a, s_b in product(range(1, 6), range(1, 6)):
            self.assertEqual(t_9.p(s_a=s_a, s_b=s_b),
                             0.25 if s_a + s_b == 9 else 0)
예제 #21
0
    def test__1_1_2(self):

        population_counts = Series({
            'England': 60_776_238,
            'Scotland': 5_116_900,
            'Wales': 2_980_700
        })
        population_counts.index.name = 'country'
        populations = Discrete.from_counts(data=population_counts)
        self.assertAlmostEqual(0.882, populations.p(country='England'), 3)
        self.assertAlmostEqual(0.074, populations.p(country='Scotland'), 3)
        self.assertAlmostEqual(0.043, populations.p(country='Wales'), 3)
        language_probs = DataFrame.from_dict(
            {
                'England': {
                    'English': 0.95,
                    'Scottish': 0.04,
                    'Welsh': 0.01
                },
                'Scotland': {
                    'English': 0.7,
                    'Scottish': 0.3,
                    'Welsh': 0.0
                },
                'Wales': {
                    'English': 0.6,
                    'Scottish': 0.0,
                    'Welsh': 0.4
                }
            },
            orient='columns')
        language_probs.index.name = 'language'
        language_probs.columns.name = 'country'
        language__given__country = Conditional(data=language_probs)
        self.assertIsInstance(language__given__country.data, DataFrame)
        self.assertListEqual(['language'],
                             language__given__country.joint_variables)
        self.assertListEqual(['country'],
                             language__given__country.conditional_variables)
        country__language = language__given__country * populations
        for prob, country, language in [
            (0.838, 'England', 'English'),
            (0.035, 'England', 'Scottish'),
            (0.009, 'England', 'Welsh'),
            (0.052, 'Scotland', 'English'),
            (0.022, 'Scotland', 'Scottish'),
            (0.0, 'Scotland', 'Welsh'),
            (0.026, 'Wales', 'English'),
            (0.0, 'Wales', 'Scottish'),
            (0.017, 'Wales', 'Welsh'),
        ]:
            self.assertAlmostEqual(
                prob, country__language.p(country=country, language=language),
                3)
예제 #22
0
    def test_from_counts__2_vars__vars_as_arg(self):

        counts = Series({('a', 'b'): 1, ('c', 'd'): 2, ('e', 'f'): 3})
        discrete = Discrete.from_counts(counts, variables=['ace', 'bdf'])
        self.assertEqual(['ace', 'bdf'], discrete.variables)
        self.assertEqual({
            'ace': ['a', 'c', 'e'],
            'bdf': ['b', 'd', 'f']
        }, discrete.states)
        self.assertEqual(1 / 6, discrete.p(ace='a', bdf='b'))
        self.assertEqual(2 / 6, discrete.p(ace='c', bdf='d'))
        self.assertEqual(3 / 6, discrete.p(ace='e', bdf='f'))
예제 #23
0
    def test_from_observations__2_vars__extra_states(self):

        observations = DataFrame({
            'ace': ['a', 'c', 'c', 'e', 'e', 'e'],
            'bdf': ['b', 'd', 'd', 'f', 'f', 'f']
        })
        states = {'ace': ['a', 'c', 'e', 'g'], 'bdf': ['b', 'd', 'f', 'h']}
        discrete = Discrete.from_observations(observations, states=states)
        self.assertEqual(['ace', 'bdf'], discrete.variables)
        self.assertEqual(states, discrete.states)
        self.assertEqual(1 / 6, discrete.p(ace='a', bdf='b'))
        self.assertEqual(2 / 6, discrete.p(ace='c', bdf='d'))
        self.assertEqual(3 / 6, discrete.p(ace='e', bdf='f'))
예제 #24
0
    def test_given_all_variables(self):

        expected = Discrete.binary(0, 'A_xor_B').data
        xor = Conditional.binary_from_probs(data={
            (0, 0): 0,
            (0, 1): 1,
            (1, 0): 1,
            (1, 1): 0,
        },
                                            joint_variable='A_xor_B',
                                            conditional_variables=['A', 'B'])
        actual = xor.given(A=1, B=1).data
        self.assertTrue(expected.equals(actual))
예제 #25
0
    def test_from_observations__2_vars__replace_vars(self):

        observations = DataFrame({
            'ace': ['a', 'c', 'c', 'e', 'e', 'e'],
            'bdf': ['b', 'd', 'd', 'f', 'f', 'f']
        })
        discrete = Discrete.from_observations(observations,
                                              variables=['ACE', 'BDF'])
        self.assertEqual(['ACE', 'BDF'], discrete.variables)
        self.assertEqual({
            'ACE': ['a', 'c', 'e'],
            'BDF': ['b', 'd', 'f']
        }, discrete.states)
        self.assertEqual(1 / 6, discrete.p(ACE='a', BDF='b'))
        self.assertEqual(2 / 6, discrete.p(ACE='c', BDF='d'))
        self.assertEqual(3 / 6, discrete.p(ACE='e', BDF='f'))
예제 #26
0
    def test_mean_categorical(self):

        counts = Series({'a': 1, 'c': 2, 'e': 3})
        discrete = Discrete.from_counts(counts, variables='x')
        self.assertRaises(TypeError, discrete.mean)
예제 #27
0
    def test_mode_1d_categorical(self):

        counts = Series({'a': 1, 'c': 2, 'e': 3})
        discrete = Discrete.from_counts(counts, variables='x')
        self.assertEqual('e', discrete.mode())
예제 #28
0
    def test_mode_1d_multi_value(self):

        counts = Series({'a': 2, 'b': 2, 'c': 1})
        discrete = Discrete.from_counts(counts, variables='x')
        self.assertListEqual(['a', 'b'], discrete.mode())