def plot_wikipedia_cdfs(): """ https://en.wikipedia.org/wiki/Beta_distribution#/media/File:Beta_distribution_cdf.svg """ ax = new_axes(width=10, height=10) Beta(0.5, 0.5).cdf().plot(x=x, color='red', ax=ax) Beta(5, 1).cdf().plot(x=x, color='blue', ax=ax) Beta(1, 3).cdf().plot(x=x, color='green', ax=ax) Beta(2, 2).cdf().plot(x=x, color='purple', ax=ax) Beta(2, 5).cdf().plot(x=x, color='orange', ax=ax) ax.set_title('Cumulative distribution function') ax.legend(loc='upper left') plt.show()
def plot_wikipedia_pdfs(): """ https://en.wikipedia.org/wiki/Beta_distribution#/media/File:Beta_distribution_pdf.svg """ ax = new_axes(width=10, height=10) Beta(0.5, 0.5).pdf().plot(x=x, color='red', ax=ax) Beta(5, 1).pdf().plot(x=x, color='blue', ax=ax) Beta(1, 3).pdf().plot(x=x, color='green', ax=ax) Beta(2, 2).pdf().plot(x=x, color='purple', ax=ax) Beta(2, 5).pdf().plot(x=x, color='orange', ax=ax) ax.set_ylim(0, 2.5) ax.set_title('Probability density function') ax.legend(loc='upper center') plt.show()
def test_infer_posteriors(self): g__1_1 = Beta(1 + 1, 1 + 1) g__1_2 = Beta(1 + 1, 1 + 2) g__1_3 = Beta(1 + 1, 1 + 3) g__1_4 = Beta(1 + 1, 1 + 4) g__2_0 = Beta(1 + 2, 1 + 0) g__2_1 = Beta(1 + 2, 1 + 1) g__2_2 = Beta(1 + 2, 1 + 2) g__2_3 = Beta(1 + 2, 1 + 3) expected = DataFrame( data=[(1, 1, 'c', 1, g__1_1), (2, 1, 'c', 1, g__1_2), (1, 2, 'c', 1, g__1_3), (2, 2, 'c', 1, g__1_4), (1, 1, 'd', 1, g__2_0), (2, 1, 'd', 1, g__2_1), (1, 2, 'd', 1, g__2_2), (2, 2, 'd', 1, g__2_3)], columns=['a', 'b', 'prob_var', 'prob_val', 'Beta']) actual = BetaGeometricConjugate.infer_posteriors( data=self.geometric_data, prob_vars=['c', 'd'], cond_vars=['a', 'b']) for _, row in expected.iterrows(): actual_beta = actual.loc[(actual['a'] == row['a']) & (actual['b'] == row['b']) & (actual['prob_var'] == row['prob_var']) & (actual['prob_val'] == row['prob_val']), 'Beta'].iloc[0] self.assertTrue(row['Beta'] == actual_beta)
def test_fit(self): for alpha, beta in zip((0.5, 5, 1, 2, 2), (0.5, 1, 3, 2, 5)): beta_orig = Beta(alpha, beta) beta_fit = Beta.fit(beta_orig.rvs(100_000)) self.assertAlmostEqual(beta_fit.alpha, beta_orig.alpha, 1) self.assertAlmostEqual(beta_fit.beta, beta_orig.beta, 1)
def plot_ml_app(): """ Machine Learning: A Probabilistic Perspective. Figure 3.6 """ _, ax = plt.subplots(nrows=1, ncols=2, figsize=(16, 9)) bb_1 = BetaBinomial(n=20, alpha=2, beta=2) bb_1.prior().plot(x=x, color='red', ax=ax[0]) Beta(alpha=3, beta=17).plot(x=x, color='black', ax=ax[0]) # using a Beta to plot likelihood on the same scale bb_1.posterior(m=3).plot(x=x, color='blue', ax=ax[0]) ax[0].legend() bb_2 = BetaBinomial(n=20, alpha=5, beta=2) bb_2.prior().plot(x=x, color='red', ax=ax[1]) Beta(alpha=11, beta=13).plot(x=x, color='black', ax=ax[1]) # using a Beta to plot likelihood on the same scale bb_2.posterior(m=11).plot(x=x, color='blue', ax=ax[1]) ax[1].legend() plt.show()
def from_proportions(data: DataFrame): """ Fit to a DataFrame of proportions. Returns a Series with one item for each column in data. """ return BetaSeries( Series({ column: Beta.fit(data[column].dropna()) for column in data.columns }))
def setUp(self) -> None: self.b1 = Beta(700, 300) self.b2 = Beta(600, 400) self.b3 = Beta(500, 500) self.d1 = Dirichlet([500, 300, 200]) self.d2 = Dirichlet({'x': 100, 'y': 200, 'z': 300}) self.b1__mul__b2 = self.b1 * self.b2 self.b3__mul__b1__mul__b2 = self.b3 * self.b1__mul__b2 self.b1__mul__comp__b1 = self.b1 * (1 - self.b1) self.b_series = Series({'b1': self.b1, 'b2': self.b2, 'b3': self.b3}) self.b_frame = DataFrame({ 'c1': { 'r1': self.b1, 'r2': self.b2 }, 'c2': { 'r1': self.b2, 'r2': self.b3 } }) self.float_series = Series({'$100': 0.8, '$200': 0.6})
def conditional_beta_table( self, condition: 'DataProbabilityTableMixin') -> BetaFrame: """ Return the conditional probability of each category given different values of condition. """ counts = self.joint_count_table(condition) row_sums = counts.sum(axis=1).to_list() beta_dicts = [] for r in range(len(row_sums)): beta_dicts.append(counts.iloc[r].map( lambda count: Beta(count, row_sums[r] - count))) return BetaFrame(DataFrame(beta_dicts))
def test_distribution_table__significance(self): a = array([2, 4, 6, 0, 3]) n = a.sum() b = n - a a_others = array([(a.sum() - a[i]) / (len(a) - 1) for i in range(len(a))]) b_others = n - a_others expected = DataFrame(data=[ ('1 - strongly disagree', a[0], Beta(1 + a[0], 1 + b[0]) > Beta( 1 + a_others[0], 1 + b_others[0])), ('2 - disagree', a[1], Beta(1 + a[1], 1 + b[1]) > Beta( 1 + a_others[1], 1 + b_others[1])), ('3 - neither agree nor disagree', a[2], Beta(1 + a[2], 1 + b[2]) > Beta(1 + a_others[2], 1 + b_others[2])), ('4 - agree', a[3], Beta(1 + a[3], 1 + b[3]) > Beta( 1 + a_others[3], 1 + b_others[3])), ('5 - strongly agree', a[4], Beta(1 + a[4], 1 + b[4]) > Beta(1 + a_others[4], 1 + b_others[4])) ], columns=['Value', 'Count', 'Significance']) actual = self.question.distribution_table(significance=True) self.assertTrue(expected.equals(actual))
def test_infer_posteriors(self): b__0_3 = Beta(1 + 0, 1 + 3) b__1_2 = Beta(1 + 1, 1 + 2) b__2_1 = Beta(1 + 2, 1 + 1) b__3_0 = Beta(1 + 3, 1 + 0) expected = DataFrame( data=[(1, 1, 'c', 1, b__0_3), (2, 1, 'c', 1, b__1_2), (1, 2, 'c', 1, b__2_1), (2, 2, 'c', 1, b__3_0), (1, 1, 'd', 1, b__3_0), (2, 1, 'd', 1, b__2_1), (1, 2, 'd', 1, b__1_2), (2, 2, 'd', 1, b__0_3)], columns=['a', 'b', 'prob_var', 'prob_val', 'Beta']) actual = BetaBinomialConjugate.infer_posteriors( data=self.binomial_data, prob_vars=['c', 'd'], cond_vars=['a', 'b']) for _, row in expected.iterrows(): actual_beta = actual.loc[(actual['a'] == row['a']) & (actual['b'] == row['b']) & (actual['prob_var'] == row['prob_var']) & (actual['prob_val'] == row['prob_val']), 'Beta'].iloc[0] self.assertTrue(row['Beta'] == actual_beta)
def test_distribution_table__significance(self): a = array([3, 2, 1, 0]) n = a.sum() b = n - a a_others = array([(a.sum() - a[i]) / (len(a) - 1) for i in range(len(a))]) b_others = n - a_others expected = DataFrame(data=[ ('apples', a[0], Beta(1 + a[0], 1 + b[0]) > Beta( 1 + a_others[0], 1 + b_others[0])), ('bananas', a[1], Beta(1 + a[1], 1 + b[1]) > Beta( 1 + a_others[1], 1 + b_others[1])), ('cherries', a[2], Beta(1 + a[2], 1 + b[2]) > Beta( 1 + a_others[2], 1 + b_others[2])), ('dates', a[3], Beta(1 + a[3], 1 + b[3]) > Beta( 1 + a_others[3], 1 + b_others[3])), ], columns=['Value', 'Count', 'Significance']) actual = self.question.distribution_table(significance=True) self.assertTrue(expected.equals(actual))
def from_bool_frame(data: DataFrame, prior_alpha: float = 0, prior_beta: float = 0, name: str = ''): """ Create a new BetaSeries using the counts of True and False or 1 and 0 in a DataFrame. :param data: Data with True / False counts. :param prior_alpha: Value for alpha assuming these represent posterior distributions. :param prior_beta: Value for alpha assuming these represent posterior distributions. :param name: Name for the Series. """ betas = {} for col in data.columns: betas[col] = Beta(alpha=prior_alpha + (data[col] == 1).sum(), beta=prior_beta + (data[col] == 0).sum()) betas = Series(data=betas, name=name) return BetaSeries(betas)
def setUp(self) -> None: self.prior_float = 0.3 self.prior_beta = Beta(1 + 3, 1 + 7) self.prior_float_map = Series({'$100': 0.3, '$200': 0.2}) self.prior_beta_map = Series({ '$100': Beta(1 + 3, 1 + 7), '$200': Beta(1 + 2, 1 + 8) }) self.likelihood_float = 0.8 self.likelihood_float_map = Series({'$100': 0.8, '$200': 0.6}) self.likelihood_beta = Beta(1 + 8, 1 + 2) self.likelihood_beta_map = Series({ '$100': Beta(1 + 8, 1 + 2), '$200': Beta(1 + 6, 1 + 4) })
def test_distribution_table__significance(self): n_one = 3 n_rest = 6 a_one = array([3, 2, 1]) b_one = n_one - a_one a_rest = array([sum(a_one) - a_one[i] for i in range(len(a_one))]) b_rest = n_rest - a_rest expected = DataFrame(data=[ ('apples', 3, Beta(1 + a_one[0], 1 + b_one[0]) > Beta(1 + a_rest[0], 1 + b_rest[0])), ('bananas', 2, Beta(1 + a_one[1], 1 + b_one[1]) > Beta(1 + a_rest[1], 1 + b_rest[1])), ('cherries', 1, Beta(1 + a_one[2], 1 + b_one[2]) > Beta(1 + a_rest[2], 1 + b_rest[2])), ], columns=['Value', 'Count', 'Significance']) actual = self.question.distribution_table(significance=True) self.assertTrue(expected.equals(actual))
def test_get_item(self): for k, v in self.d_series.alpha.items(): expected = Beta(alpha=v, beta=1 - v) actual = self.d_series[k] self.assertTrue(expected == actual)
def test_infer_posteriors_with_stats(self): b__0_3 = Beta(1 + 0, 1 + 3) b__1_2 = Beta(1 + 1, 1 + 2) b__2_1 = Beta(1 + 2, 1 + 1) b__3_0 = Beta(1 + 3, 1 + 0) expected = DataFrame(data=[ (1, 1, 'c', 1, b__0_3, b__0_3.mean(), b__0_3.interval(.95)), (2, 1, 'c', 1, b__1_2, b__1_2.mean(), b__1_2.interval(.95)), (1, 2, 'c', 1, b__2_1, b__2_1.mean(), b__2_1.interval(.95)), (2, 2, 'c', 1, b__3_0, b__3_0.mean(), b__3_0.interval(.95)), (1, 1, 'd', 1, b__3_0, b__3_0.mean(), b__3_0.interval(.95)), (2, 1, 'd', 1, b__2_1, b__2_1.mean(), b__2_1.interval(.95)), (1, 2, 'd', 1, b__1_2, b__1_2.mean(), b__1_2.interval(.95)), (2, 2, 'd', 1, b__0_3, b__0_3.mean(), b__0_3.interval(.95)) ], columns=[ 'a', 'b', 'prob_var', 'prob_val', 'Beta', 'mean', 'interval__0.95' ]) actual = BetaBinomialConjugate.infer_posteriors( data=self.binomial_data, prob_vars=['c', 'd'], cond_vars=['a', 'b'], stats=['mean', { 'interval': 0.95 }]) for _, row in expected.iterrows(): actual_beta = actual.loc[(actual['a'] == row['a']) & (actual['b'] == row['b']) & (actual['prob_var'] == row['prob_var']) & (actual['prob_val'] == row['prob_val']), 'Beta'].iloc[0] self.assertTrue(row['Beta'] == actual_beta)
def test_infer_posterior(self): expected = Beta(alpha=1 + 4, beta=1 + 6) actual = BetaBinomialConjugate.infer_posterior(self.series) self.assertEqual(expected, actual)
def test_infer_posterior(self): expected = Beta(alpha=1 + 2, beta=1 + 10 - 2) actual = BetaGeometricConjugate.infer_posterior(self.series) self.assertEqual(expected, actual)