Beispiel #1
0
def plot_wikipedia_cdfs():
    """
    https://en.wikipedia.org/wiki/Beta_distribution#/media/File:Beta_distribution_cdf.svg
    """
    ax = new_axes(width=10, height=10)
    Beta(0.5, 0.5).cdf().plot(x=x, color='red', ax=ax)
    Beta(5, 1).cdf().plot(x=x, color='blue', ax=ax)
    Beta(1, 3).cdf().plot(x=x, color='green', ax=ax)
    Beta(2, 2).cdf().plot(x=x, color='purple', ax=ax)
    Beta(2, 5).cdf().plot(x=x, color='orange', ax=ax)
    ax.set_title('Cumulative distribution function')
    ax.legend(loc='upper left')
    plt.show()
Beispiel #2
0
def plot_wikipedia_pdfs():
    """
    https://en.wikipedia.org/wiki/Beta_distribution#/media/File:Beta_distribution_pdf.svg
    """
    ax = new_axes(width=10, height=10)
    Beta(0.5, 0.5).pdf().plot(x=x, color='red', ax=ax)
    Beta(5, 1).pdf().plot(x=x, color='blue', ax=ax)
    Beta(1, 3).pdf().plot(x=x, color='green', ax=ax)
    Beta(2, 2).pdf().plot(x=x, color='purple', ax=ax)
    Beta(2, 5).pdf().plot(x=x, color='orange', ax=ax)
    ax.set_ylim(0, 2.5)
    ax.set_title('Probability density function')
    ax.legend(loc='upper center')
    plt.show()
    def test_infer_posteriors(self):

        g__1_1 = Beta(1 + 1, 1 + 1)
        g__1_2 = Beta(1 + 1, 1 + 2)
        g__1_3 = Beta(1 + 1, 1 + 3)
        g__1_4 = Beta(1 + 1, 1 + 4)
        g__2_0 = Beta(1 + 2, 1 + 0)
        g__2_1 = Beta(1 + 2, 1 + 1)
        g__2_2 = Beta(1 + 2, 1 + 2)
        g__2_3 = Beta(1 + 2, 1 + 3)

        expected = DataFrame(
            data=[(1, 1, 'c', 1, g__1_1), (2, 1, 'c', 1, g__1_2),
                  (1, 2, 'c', 1, g__1_3), (2, 2, 'c', 1, g__1_4),
                  (1, 1, 'd', 1, g__2_0), (2, 1, 'd', 1, g__2_1),
                  (1, 2, 'd', 1, g__2_2), (2, 2, 'd', 1, g__2_3)],
            columns=['a', 'b', 'prob_var', 'prob_val', 'Beta'])
        actual = BetaGeometricConjugate.infer_posteriors(
            data=self.geometric_data,
            prob_vars=['c', 'd'],
            cond_vars=['a', 'b'])
        for _, row in expected.iterrows():
            actual_beta = actual.loc[(actual['a'] == row['a']) &
                                     (actual['b'] == row['b']) &
                                     (actual['prob_var'] == row['prob_var']) &
                                     (actual['prob_val'] == row['prob_val']),
                                     'Beta'].iloc[0]
            self.assertTrue(row['Beta'] == actual_beta)
Beispiel #4
0
    def test_fit(self):

        for alpha, beta in zip((0.5, 5, 1, 2, 2), (0.5, 1, 3, 2, 5)):
            beta_orig = Beta(alpha, beta)
            beta_fit = Beta.fit(beta_orig.rvs(100_000))
            self.assertAlmostEqual(beta_fit.alpha, beta_orig.alpha, 1)
            self.assertAlmostEqual(beta_fit.beta, beta_orig.beta, 1)
Beispiel #5
0
def plot_ml_app():
    """
    Machine Learning: A Probabilistic Perspective. Figure 3.6
    """
    _, ax = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    bb_1 = BetaBinomial(n=20, alpha=2, beta=2)
    bb_1.prior().plot(x=x, color='red', ax=ax[0])
    Beta(alpha=3, beta=17).plot(x=x, color='black', ax=ax[0])  # using a Beta to plot likelihood on the same scale
    bb_1.posterior(m=3).plot(x=x, color='blue', ax=ax[0])
    ax[0].legend()
    bb_2 = BetaBinomial(n=20, alpha=5, beta=2)
    bb_2.prior().plot(x=x, color='red', ax=ax[1])
    Beta(alpha=11, beta=13).plot(x=x, color='black', ax=ax[1])  # using a Beta to plot likelihood on the same scale
    bb_2.posterior(m=11).plot(x=x, color='blue', ax=ax[1])
    ax[1].legend()
    plt.show()
Beispiel #6
0
 def from_proportions(data: DataFrame):
     """
     Fit to a DataFrame of proportions. Returns a Series with one item for
     each column in data.
     """
     return BetaSeries(
         Series({
             column: Beta.fit(data[column].dropna())
             for column in data.columns
         }))
Beispiel #7
0
    def setUp(self) -> None:

        self.b1 = Beta(700, 300)
        self.b2 = Beta(600, 400)
        self.b3 = Beta(500, 500)
        self.d1 = Dirichlet([500, 300, 200])
        self.d2 = Dirichlet({'x': 100, 'y': 200, 'z': 300})
        self.b1__mul__b2 = self.b1 * self.b2
        self.b3__mul__b1__mul__b2 = self.b3 * self.b1__mul__b2
        self.b1__mul__comp__b1 = self.b1 * (1 - self.b1)
        self.b_series = Series({'b1': self.b1, 'b2': self.b2, 'b3': self.b3})
        self.b_frame = DataFrame({
            'c1': {
                'r1': self.b1,
                'r2': self.b2
            },
            'c2': {
                'r1': self.b2,
                'r2': self.b3
            }
        })
        self.float_series = Series({'$100': 0.8, '$200': 0.6})
 def conditional_beta_table(
         self, condition: 'DataProbabilityTableMixin') -> BetaFrame:
     """
     Return the conditional probability of each category given different
     values of condition.
     """
     counts = self.joint_count_table(condition)
     row_sums = counts.sum(axis=1).to_list()
     beta_dicts = []
     for r in range(len(row_sums)):
         beta_dicts.append(counts.iloc[r].map(
             lambda count: Beta(count, row_sums[r] - count)))
     return BetaFrame(DataFrame(beta_dicts))
    def test_distribution_table__significance(self):

        a = array([2, 4, 6, 0, 3])
        n = a.sum()
        b = n - a
        a_others = array([(a.sum() - a[i]) / (len(a) - 1)
                          for i in range(len(a))])
        b_others = n - a_others

        expected = DataFrame(data=[
            ('1 - strongly disagree', a[0], Beta(1 + a[0], 1 + b[0]) > Beta(
                1 + a_others[0], 1 + b_others[0])),
            ('2 - disagree', a[1], Beta(1 + a[1], 1 + b[1]) > Beta(
                1 + a_others[1], 1 + b_others[1])),
            ('3 - neither agree nor disagree', a[2], Beta(1 + a[2], 1 + b[2]) >
             Beta(1 + a_others[2], 1 + b_others[2])),
            ('4 - agree', a[3], Beta(1 + a[3], 1 + b[3]) > Beta(
                1 + a_others[3], 1 + b_others[3])),
            ('5 - strongly agree', a[4],
             Beta(1 + a[4], 1 + b[4]) > Beta(1 + a_others[4], 1 + b_others[4]))
        ],
                             columns=['Value', 'Count', 'Significance'])
        actual = self.question.distribution_table(significance=True)
        self.assertTrue(expected.equals(actual))
    def test_infer_posteriors(self):

        b__0_3 = Beta(1 + 0, 1 + 3)
        b__1_2 = Beta(1 + 1, 1 + 2)
        b__2_1 = Beta(1 + 2, 1 + 1)
        b__3_0 = Beta(1 + 3, 1 + 0)

        expected = DataFrame(
            data=[(1, 1, 'c', 1, b__0_3), (2, 1, 'c', 1, b__1_2),
                  (1, 2, 'c', 1, b__2_1), (2, 2, 'c', 1, b__3_0),
                  (1, 1, 'd', 1, b__3_0), (2, 1, 'd', 1, b__2_1),
                  (1, 2, 'd', 1, b__1_2), (2, 2, 'd', 1, b__0_3)],
            columns=['a', 'b', 'prob_var', 'prob_val', 'Beta'])
        actual = BetaBinomialConjugate.infer_posteriors(
            data=self.binomial_data,
            prob_vars=['c', 'd'],
            cond_vars=['a', 'b'])
        for _, row in expected.iterrows():
            actual_beta = actual.loc[(actual['a'] == row['a']) &
                                     (actual['b'] == row['b']) &
                                     (actual['prob_var'] == row['prob_var']) &
                                     (actual['prob_val'] == row['prob_val']),
                                     'Beta'].iloc[0]
            self.assertTrue(row['Beta'] == actual_beta)
    def test_distribution_table__significance(self):

        a = array([3, 2, 1, 0])
        n = a.sum()
        b = n - a
        a_others = array([(a.sum() - a[i]) / (len(a) - 1)
                          for i in range(len(a))])
        b_others = n - a_others

        expected = DataFrame(data=[
            ('apples', a[0], Beta(1 + a[0], 1 + b[0]) > Beta(
                1 + a_others[0], 1 + b_others[0])),
            ('bananas', a[1], Beta(1 + a[1], 1 + b[1]) > Beta(
                1 + a_others[1], 1 + b_others[1])),
            ('cherries', a[2], Beta(1 + a[2], 1 + b[2]) > Beta(
                1 + a_others[2], 1 + b_others[2])),
            ('dates', a[3], Beta(1 + a[3], 1 + b[3]) > Beta(
                1 + a_others[3], 1 + b_others[3])),
        ],
                             columns=['Value', 'Count', 'Significance'])
        actual = self.question.distribution_table(significance=True)
        self.assertTrue(expected.equals(actual))
Beispiel #12
0
    def from_bool_frame(data: DataFrame,
                        prior_alpha: float = 0,
                        prior_beta: float = 0,
                        name: str = ''):
        """
        Create a new BetaSeries using the counts of True and False or 1 and 0
        in a DataFrame.

        :param data: Data with True / False counts.
        :param prior_alpha: Value for alpha assuming these represent posterior
                            distributions.
        :param prior_beta: Value for alpha assuming these represent posterior
                            distributions.
        :param name: Name for the Series.
        """
        betas = {}
        for col in data.columns:
            betas[col] = Beta(alpha=prior_alpha + (data[col] == 1).sum(),
                              beta=prior_beta + (data[col] == 0).sum())
        betas = Series(data=betas, name=name)
        return BetaSeries(betas)
    def setUp(self) -> None:

        self.prior_float = 0.3
        self.prior_beta = Beta(1 + 3, 1 + 7)
        self.prior_float_map = Series({'$100': 0.3, '$200': 0.2})
        self.prior_beta_map = Series({
            '$100': Beta(1 + 3, 1 + 7),
            '$200': Beta(1 + 2, 1 + 8)
        })
        self.likelihood_float = 0.8
        self.likelihood_float_map = Series({'$100': 0.8, '$200': 0.6})
        self.likelihood_beta = Beta(1 + 8, 1 + 2)
        self.likelihood_beta_map = Series({
            '$100': Beta(1 + 8, 1 + 2),
            '$200': Beta(1 + 6, 1 + 4)
        })
    def test_distribution_table__significance(self):

        n_one = 3
        n_rest = 6
        a_one = array([3, 2, 1])
        b_one = n_one - a_one
        a_rest = array([sum(a_one) - a_one[i]
                        for i in range(len(a_one))])
        b_rest = n_rest - a_rest
        expected = DataFrame(data=[
            ('apples', 3,
             Beta(1 + a_one[0], 1 + b_one[0]) >
             Beta(1 + a_rest[0], 1 + b_rest[0])),
            ('bananas', 2,
             Beta(1 + a_one[1], 1 + b_one[1]) >
             Beta(1 + a_rest[1], 1 + b_rest[1])),
            ('cherries', 1,
             Beta(1 + a_one[2], 1 + b_one[2]) >
             Beta(1 + a_rest[2], 1 + b_rest[2])),
        ], columns=['Value', 'Count', 'Significance'])
        actual = self.question.distribution_table(significance=True)
        self.assertTrue(expected.equals(actual))
Beispiel #15
0
    def test_get_item(self):

        for k, v in self.d_series.alpha.items():
            expected = Beta(alpha=v, beta=1 - v)
            actual = self.d_series[k]
            self.assertTrue(expected == actual)
    def test_infer_posteriors_with_stats(self):

        b__0_3 = Beta(1 + 0, 1 + 3)
        b__1_2 = Beta(1 + 1, 1 + 2)
        b__2_1 = Beta(1 + 2, 1 + 1)
        b__3_0 = Beta(1 + 3, 1 + 0)

        expected = DataFrame(data=[
            (1, 1, 'c', 1, b__0_3, b__0_3.mean(), b__0_3.interval(.95)),
            (2, 1, 'c', 1, b__1_2, b__1_2.mean(), b__1_2.interval(.95)),
            (1, 2, 'c', 1, b__2_1, b__2_1.mean(), b__2_1.interval(.95)),
            (2, 2, 'c', 1, b__3_0, b__3_0.mean(), b__3_0.interval(.95)),
            (1, 1, 'd', 1, b__3_0, b__3_0.mean(), b__3_0.interval(.95)),
            (2, 1, 'd', 1, b__2_1, b__2_1.mean(), b__2_1.interval(.95)),
            (1, 2, 'd', 1, b__1_2, b__1_2.mean(), b__1_2.interval(.95)),
            (2, 2, 'd', 1, b__0_3, b__0_3.mean(), b__0_3.interval(.95))
        ],
                             columns=[
                                 'a', 'b', 'prob_var', 'prob_val', 'Beta',
                                 'mean', 'interval__0.95'
                             ])
        actual = BetaBinomialConjugate.infer_posteriors(
            data=self.binomial_data,
            prob_vars=['c', 'd'],
            cond_vars=['a', 'b'],
            stats=['mean', {
                'interval': 0.95
            }])
        for _, row in expected.iterrows():
            actual_beta = actual.loc[(actual['a'] == row['a']) &
                                     (actual['b'] == row['b']) &
                                     (actual['prob_var'] == row['prob_var']) &
                                     (actual['prob_val'] == row['prob_val']),
                                     'Beta'].iloc[0]
            self.assertTrue(row['Beta'] == actual_beta)
    def test_infer_posterior(self):

        expected = Beta(alpha=1 + 4, beta=1 + 6)
        actual = BetaBinomialConjugate.infer_posterior(self.series)
        self.assertEqual(expected, actual)
    def test_infer_posterior(self):

        expected = Beta(alpha=1 + 2, beta=1 + 10 - 2)
        actual = BetaGeometricConjugate.infer_posterior(self.series)
        self.assertEqual(expected, actual)