def test_parameter_estimation(self): X = np.random.uniform(0, 4, 1000) y = X + np.random.normal(0, 1, 1000) m = BayesianBootstrapBagging(LinearRegression(), 10000, 1000, low_mem=False) m.fit(X.reshape(-1, 1), y) coef_samples = [b.coef_ for b in m.base_models_] intercept_samples = [b.intercept_ for b in m.base_models_] self.assertAlmostEqual(np.mean(coef_samples), 1, delta=0.3) l, r = central_credible_interval(coef_samples, alpha=0.05) self.assertLess(l, 1) self.assertGreater(r, 1) l, r = highest_density_interval(coef_samples, alpha=0.05) self.assertLess(l, 1) self.assertGreater(r, 1) self.assertAlmostEqual(np.mean(intercept_samples), 0, delta=0.3) l, r = central_credible_interval(intercept_samples, alpha=0.05) self.assertLess(l, 0) self.assertGreater(r, 0) self.assertAlmostEqual(np.mean(intercept_samples), 0, delta=0.3) l, r = highest_density_interval(intercept_samples, alpha=0.05) self.assertLess(l, 0) self.assertGreater(r, 0)
def test_hpdi(self): l, r = highest_density_interval(self._shuffle([0, 10, 1] + [1.1] * 7), alpha=0.2) self.assertEqual(l, 1) self.assertEqual(r, 1.1) l, r = highest_density_interval(self._shuffle([0, 10, 1.1, 1]), alpha=0.5) self.assertEqual(l, 1) self.assertEqual(r, 1.1)
def plot_group_hdis(samples, labels, alpha, n_replications): for i, (s, l) in enumerate(zip(samples, labels)): posterior = mean(s, n_replications) l, r = highest_density_interval(posterior) plt.plot([i, i], [l, r]) plt.plot([i], [np.mean(posterior)], marker='o') plt.xticks(range(len(labels)), labels)
def bb_hdi(a_bootstrap, b_bootstrap, alpha=0.05): """Calculate a 1-alpha high density interval Args: a_bootstrap: a list of resampled means from page A journeys. b_bootstrap: a list of resampled means from page B journeys. alpha: false positive rate. Returns: a_ci_low: the lower point of the 1-alpha% highest density interval for A. a_ci_hi: the higher point of the 1-alpha% highest density interval for A. b_ci_low: the lower point of the 1-alpha% highest density interval for B. b_ci_hi: the higher point of the 1-alpha% highest density interval for B. ypa_diff_mean: the mean difference for the posterior between A's and B's distributions. ypa_diff_ci_low: lower hdi for posterior of the difference. ypa_diff_ci_hi: upper hdi for posterior of the difference. prob_b_>_a: number of values greater than 0 divided by num of obs for mean diff posterior. Or the probability that B's mean metric was greater than A's mean metric. """ # Calculate a 95% HDI a_ci_low, a_ci_hi = bb.highest_density_interval(a_bootstrap, alpha=alpha) # Calculate a 95% HDI b_ci_low, b_ci_hi = bb.highest_density_interval(b_bootstrap, alpha=alpha) # calculate the posterior for the difference between A's and B's mean of resampled means # ypa prefix is vestigial from blog post ypa_diff = np.array(b_bootstrap) - np.array(a_bootstrap) ypa_diff_mean = ypa_diff.mean() # get the hdi ypa_diff_ci_low, ypa_diff_ci_hi = bb.highest_density_interval(ypa_diff, alpha=alpha) # We count the number of values greater than 0 and divide by the total number # of observations # which returns us the the proportion of values in the distribution that are # greater than 0 p_value = (ypa_diff > 0).sum() / ypa_diff.shape[0] return { 'a_ci_low': a_ci_low, 'a_ci_hi': a_ci_hi, 'b_ci_low': b_ci_low, 'b_ci_hi': b_ci_hi, 'diff_mean': ypa_diff_mean, 'diff_ci_low': ypa_diff_ci_low, 'diff_ci_hi': ypa_diff_ci_hi, 'prob_b_>_a': p_value }
def bootstrap(): print(X, round(df[X].mean(), 2)) player_bootstrap = bb.mean(df[X], n_replications=10000) ci_low, ci_hi = bb.highest_density_interval(player_bootstrap) print('low ci:', round(ci_low, 2), 'high ci:', round(ci_hi, 2)) sns.distplot(player_bootstrap) plt.show() plt.close()
def plot_mean_bootstrap_exponential_readme(): X = np.random.exponential(7, 4) classical_samples = [np.mean(resample(X)) for _ in range(10000)] posterior_samples = mean(X, 10000) l, r = highest_density_interval(posterior_samples) classical_l, classical_r = highest_density_interval(classical_samples) plt.subplot(2, 1, 1) plt.title('Bayesian Bootstrap of mean') sns.distplot(posterior_samples, label='Bayesian Bootstrap Samples') plt.plot([l, r], [0, 0], linewidth=5.0, marker='o', label='95% HDI') plt.xlim(-1, 18) plt.legend() plt.subplot(2, 1, 2) plt.title('Classical Bootstrap of mean') sns.distplot(classical_samples, label='Classical Bootstrap Samples') plt.plot([classical_l, classical_r], [0, 0], linewidth=5.0, marker='o', label='95% HDI') plt.xlim(-1, 18) plt.legend() plt.savefig('readme_exponential.png', bbox_inches='tight')
def run(self): data_loader = ExampleLoader() sample = data_loader.sample() step_num = 0 scatter_time = [] while sample is not False: scatter_time.append([data_loader.data_index - 1] * len(sample)) self._samples.append(sample) if len(self._reference) < self._reference.maxlen: # Wait until it collects full stack of reference window. self._reference.append(sample) self._change_point_score.append([0] * self._re_sample_trial) self._gamma.append(0) elif len(self._test) < self._test.maxlen: # Wait until it collects full stack of test window. self._test.append(sample) self._change_point_score.append([0] * self._re_sample_trial) self._gamma.append(0) else: _sample = self._test.popleft() self._reference.append(_sample) self._test.append(sample) _change_point_score = [] for _idx in range(0, self._re_sample_trial): _reference = define_signatures_set(self._reference) _test = define_signatures_set(self._test) if self._score_method == 'log likelihood': _cp_score = calculate_log_likelihood_cp_score( _reference, _test) elif self._score_method == 'symmetrized kl': _cp_score = calculate_kl_cp_score(_reference, _test) else: print("Not allowed scoring method: {}.".format( self._score_method)) print( "\tAccepted values for score method: 'log likelihood' or 'symmetrized kl" ) break _change_point_score.append(_cp_score) # Calculate confidence interval (standard error) for cp score mean _test_density = mean(_change_point_score, 10000) _test_low, _test_up = highest_density_interval(_test_density, alpha=0.001) self._change_point_score.append(_change_point_score) if len(self._xi_up) < self._xi_up.maxlen: self._xi_up.append(_test_up) self._gamma.append(0) else: _gamma = _test_low - self._xi_up.popleft() self._xi_up.append(_test_up) self._gamma.append(_gamma) if _gamma > 0: self._change_point_index.append(step_num) self._continuous_cp_alarm.append(step_num) if len(self._continuous_cp_alarm ) == self._continuous_cp_alarm.maxlen: if self._continuous_cp_alarm[ 0] == step_num - self._continuous_cp_alarm.maxlen + 1: if len(self._change_point_alarm_index) > 0: if self._change_point_alarm_index[ -1] in self._continuous_cp_alarm: continue self._change_point_alarm_index.append( self._continuous_cp_alarm[0]) sample = data_loader.sample() step_num += 1 # Drawing plt.figure(figsize=[15, 4]) plt.scatter(scatter_time, self._samples, marker='o', s=10, c='black', alpha=0.1) plt.title('Data') plt.xlabel('Time') plt.ylabel('Y') plt.xlim([0, step_num + 2]) plt.xticks(np.arange(0, step_num + 2, 50)) for cp in self._change_point_alarm_index: if self._score_method == 'log likelihood': plt.axvline(x=cp - self._tau + 1, color='red', ls='--', lw=1) else: plt.axvline(x=cp - int(np.ceil(self._tau / 2)) + 1, color='red', ls='--', lw=1) plt.show() plt.figure(figsize=[15, 4]) mu = np.mean(self._change_point_score, axis=1) std = np.std(self._change_point_score, axis=1) plt.plot(mu, ls='--', c='black', alpha=1.0) plt.fill_between(np.arange(0, step_num), mu - 2 * std, mu + 2 * std, color='black', alpha=0.3) plt.title('Change Point Score') plt.xlabel('Time') plt.ylabel('Change Point Score') plt.xlim([0, step_num + 2]) plt.xticks(np.arange(0, step_num + 2, 50)) for cp in self._change_point_alarm_index: plt.axvline(x=cp, color='red', ls='--', lw=1) plt.show() plt.figure(figsize=[15, 4]) plt.plot(self._gamma, ls='--', c='black', alpha=1.0) plt.title('Gamma') plt.xlabel('Time') plt.ylabel('Gamma') plt.xlim([0, step_num + 2]) plt.xticks(np.arange(0, step_num + 2, 50)) plt.axhline(y=0, color='green', lw=2, ls='--') for cp in self._change_point_alarm_index: plt.axvline(x=cp, color='red', ls='--', lw=1) plt.show()
pip install bayesian_bootstrap import numpy as np import matplotlib.pyplot as plt import seaborn as sns X = np.random.exponential(7, 4) # + from bayesian_bootstrap.bootstrap import mean, highest_density_interval, BayesianBootstrapBagging posterior_samples = mean(X, 10000) l, r = highest_density_interval(posterior_samples) plt.title('Bayesian Bootstrap of mean') sns.distplot(posterior_samples, label='Bayesian Bootstrap Samples') plt.plot([l, r], [0, 0], linewidth=5.0, marker='o', label='95% HDI') # - from bayesian_bootstrap.bootstrap import bayesian_bootstrap posterior_samples = bayesian_bootstrap(X, np.mean, 10000, 100) X = np.random.normal(0, 1, 5).reshape(-1, 1) y = X.reshape(1, -1).reshape(5) + np.random.normal(0, 1, 5) m = BayesianBootstrapBagging(LinearRegression(), 10000, 1000) m.fit(X, y) import utils p_pre, p_post = utils.load_users_covid('politicians') rfriends_pre, rfriends_post = utils.load_users_covid('random-friends') rfollowers_pre, rfollowers_post = utils.load_users_covid('random-followers')