def test_it_throws_if_you_pass_in_a_proposal_listener_but_you_didnt_specify_the_proposal_type( net: BayesNet) -> None: with pytest.raises(TypeError) as excinfo: sample(net=net, sample_from=net.get_latent_vertices(), proposal_listeners=[AcceptanceRateTracker()], drop=3) assert str( excinfo.value ) == "If you pass in proposal_listeners you must also specify proposal_distribution"
def test_sample_with_plot(net: BayesNet) -> None: KeanuRandom.set_default_random_seed(1) _, ax = plt.subplots(3, 1, squeeze=False) sample(net=net, sample_from=net.get_latent_vertices(), draws=5, plot=True, ax=ax) reorder_subplots(ax) assert len(ax) == 3 assert all(len(ax[i][0].get_lines()) == 1 for i in range(3)) assert all(len(ax[i][0].get_lines()[0].get_ydata()) == 5 for i in range(3))
def test_it_throws_if_you_pass_in_a_proposal_listener_but_the_algo_isnt_metropolis( net: BayesNet) -> None: with pytest.raises(TypeError) as excinfo: sample(net=net, sample_from=net.get_latent_vertices(), algo="hamiltonian", proposal_listeners=[AcceptanceRateTracker()], drop=3) assert str( excinfo.value ) == "Only Metropolis Hastings supports the proposal_listeners parameter"
def test_it_throws_if_you_pass_in_a_proposal_distribution_but_the_algo_isnt_metropolis( net: BayesNet) -> None: with pytest.raises(TypeError) as excinfo: sample(net=net, sample_from=net.get_latent_vertices(), algo="hamiltonian", proposal_distribution="prior", drop=3) assert str( excinfo.value ) == "Only Metropolis Hastings supports the proposal_distribution parameter"
def test_sample_with_plot(net: BayesNet) -> None: num_plots = 3 _, ax = plt.subplots(num_plots, 1, squeeze=False) sample(net=net, sample_from=net.iter_latent_vertices(), draws=5, plot=True, ax=ax) reorder_subplots(ax) assert len(ax) == num_plots assert all(len(ax[i][0].get_lines()) == 1 for i in range(num_plots)) assert all( len(ax[i][0].get_lines()[0].get_ydata()) == 5 for i in range(num_plots))
def test_iter_returns_same_result_as_sample() -> None: draws = 100 model = thermometers.model() net = BayesNet(model.temperature.iter_connected_graph()) set_starting_state(model) sampler = MetropolisHastingsSampler(proposal_distribution='prior', latents=net.iter_latent_vertices()) samples = sample(net=net, sample_from=net.iter_latent_vertices(), sampling_algorithm=sampler, draws=draws) set_starting_state(model) sampler = MetropolisHastingsSampler(proposal_distribution='prior', latents=net.iter_latent_vertices()) iter_samples = generate_samples(net=net, sample_from=net.iter_latent_vertices(), sampling_algorithm=sampler) samples_dataframe = pd.DataFrame() for iter_sample in islice(iter_samples, draws): samples_dataframe = samples_dataframe.append(iter_sample, ignore_index=True) for vertex_label in samples_dataframe: np.testing.assert_almost_equal(samples_dataframe[vertex_label].mean(), np.average(samples[vertex_label]))
def test_dropping_samples(net: BayesNet) -> None: draws = 10 drop = 3 samples = sample(net=net, sample_from=net.get_latent_vertices(), draws=draws, drop=drop) expected_num_samples = draws - drop assert all(len(vertex_samples) == expected_num_samples for vertex_id, vertex_samples in samples.items())
def test_down_sample_interval(net: BayesNet) -> None: draws = 10 down_sample_interval = 2 samples = sample( net=net, sample_from=net.get_latent_vertices(), draws=draws, down_sample_interval=down_sample_interval) expected_num_samples = draws / down_sample_interval assert all(len(vertex_samples) == expected_num_samples for vertex_id, vertex_samples in samples.items())
def test_can_get_acceptance_rates(net: BayesNet) -> None: acceptance_rate_tracker = AcceptanceRateTracker() latents = list(net.get_latent_vertices()) algo = MetropolisHastingsSampler(proposal_distribution='prior', proposal_listeners=[acceptance_rate_tracker]) samples = sample(net=net, sample_from=latents, sampling_algorithm=algo, drop=3) for latent in latents: rate = acceptance_rate_tracker.get_acceptance_rate(latent) assert 0 <= rate <= 1
def test_autocorr_returns_ndarray_of_correct_dtype() -> None: with Model() as m: m.uniform = Uniform(0, 1000) net = m.to_bayes_net() samples = sample(net=net, sample_from=net.iter_latent_vertices(), draws=10) valid_key = list(samples.keys())[0] sample_ = samples.get(valid_key) assert sample_ is not None autocorr = stats.autocorrelation(sample_) assert type(autocorr) == np.ndarray
def test_sample_throws_if_vertices_in_sample_from_are_missing_labels() -> None: sigma = Gamma(1., 1) vertex = Gaussian(0., sigma, label="gaussian") assert sigma.get_label() is None net = BayesNet([sigma, vertex]) with pytest.raises(ValueError, match=r"Vertices in sample_from must be labelled."): samples = sample(net=net, sample_from=net.iter_latent_vertices())
def test_sampling_returns_multi_indexed_dict_of_list_of_scalars_for_tensor_in_sample_from( algo: Callable[[BayesNet], PosteriorSamplingAlgorithm], tensor_net: BayesNet) -> None: draws = 5 sample_from = list(tensor_net.iter_latent_vertices()) samples = sample(net=tensor_net, sample_from=sample_from, sampling_algorithm=algo(tensor_net), draws=draws) assert type(samples) == dict __assert_valid_samples(draws, samples)
def inference_example_hmc_nuts(): with Model() as m: m.a = Gaussian(20., 1.) m.b = Gaussian(20., 1.) m.c = Gaussian(m.a + m.b, 1.) m.c.observe(43.) m.a.set_value(20.) m.b.set_value(20.) bayes_net = m.to_bayes_net() # %%SNIPPET_START%% PythonHamiltonianExample posterior_samples = sample(net=bayes_net, sample_from=bayes_net.get_latent_vertices(), algo="hamiltonian", draws=2000) # %%SNIPPET_END%% PythonHamiltonianExample # %%SNIPPET_START%% PythonNUTSExample posterior_samples = sample(net=bayes_net, sample_from=bayes_net.get_latent_vertices(), algo="NUTS", draws=2000)
def test_sampling_returns_dict_of_list_of_ndarrays_for_vertices_in_sample_from( algo: Callable[[BayesNet], PosteriorSamplingAlgorithm], net: BayesNet) -> None: draws = 5 sample_from = list(net.iter_latent_vertices()) samples = sample(net=net, sample_from=sample_from, sampling_algorithm=algo(net), draws=draws) assert len(samples) == len(sample_from) assert type(samples) == dict __assert_valid_samples(draws, samples)
def test_can_specify_nuts_params(net: BayesNet) -> None: algo = NUTSSampler(adapt_count=1000, target_acceptance_prob=0.65, adapt_step_size_enabled=True, adapt_potential_enabled=True, initial_step_size=0.1, max_tree_height=10) samples = sample(net, list(net.iter_latent_vertices()), algo, draws=500, drop=100)
def autocorrelation_example_nd(): with Model() as m: m.a = Gaussian(np.array([[20., 30.], [40., 60.]]), np.array([[1., 1.], [1., 1.]])) bayes_net = m.to_bayes_net() # %%SNIPPET_START%% PythonNdAutocorrelation algo = MetropolisHastingsSampler() posterior_samples = sample(net=bayes_net, sample_from=bayes_net.get_latent_vertices(), sampling_algorithm=algo, draws=100) vertex_samples = posterior_samples.get('a') ac = stats.autocorrelation(vertex_samples, (0, 1))
def test_sample_dict_can_be_loaded_in_to_dataframe(net: BayesNet) -> None: sample_from = list(net.iter_latent_vertices()) vertex_labels = [vertex.get_label() for vertex in sample_from] samples = sample(net=net, sample_from=sample_from, draws=5) df = pd.DataFrame(samples) for column in df: header = df[column].name vertex_label = header assert vertex_label in vertex_labels assert len(df[column]) == 5 assert type(df[column][0]) == np.float64
def test_iter_returns_same_result_as_sample(algo: str) -> None: draws = 100 model = thermometers.model() net = BayesNet(model.temperature.get_connected_graph()) set_starting_state(model) samples = sample(net=net, sample_from=net.get_latent_vertices(), algo=algo, draws=draws) set_starting_state(model) iter_samples = generate_samples(net=net, sample_from=net.get_latent_vertices(), algo=algo) samples_dataframe = pd.DataFrame() [samples_dataframe.append(pd.DataFrame(list(next_sample.items()))) for next_sample in islice(iter_samples, draws)] for vertex_id in samples_dataframe: np.testing.assert_almost_equal(samples_dataframe[vertex_id].mean(), np.average(samples[vertex_id]))
def test_sample_with_plot(net: BayesNet) -> None: KeanuRandom.set_default_random_seed(1) _, ax = plt.subplots(3, 1, squeeze=False) sample(net=net, sample_from=net.get_latent_vertices(), draws=5, plot=True, ax=ax) reorder_subplots(ax) assert len(ax) == 3 assert all(len(ax[i][0].get_lines()) == 1 for i in range(3)) assert np.allclose( ax[0][0].get_lines()[0].get_ydata(), [0.49147822, 0.49147822, 0.49147822, 0.20033212, 0.20033212]) assert np.allclose( ax[1][0].get_lines()[0].get_ydata(), [0.87268333, 1.10409369, 1.10409369, 1.10409369, 0.69098161]) assert np.allclose( ax[2][0].get_lines()[0].get_ydata(), [-14.46166855, -14.46166855, 0.32305686, 0.32305686, 0.32305686])
def test_multi_indexed_sample_dict_can_be_loaded_in_to_dataframe( tensor_net: BayesNet) -> None: sample_from = list(tensor_net.iter_latent_vertices()) vertex_labels = [vertex.get_label() for vertex in sample_from] samples = sample(net=tensor_net, sample_from=sample_from, draws=5) df = pd.DataFrame(samples) for parent_column in df.columns.levels[0]: assert parent_column in vertex_labels for child_column in df.columns.levels[1]: assert type(child_column) == tuple assert len(df[parent_column][child_column]) == 5 assert type(df[parent_column][child_column][0]) == np.float64
def test_coalmining() -> None: coal_mining = CoalMining() model = coal_mining.model() model.disasters.observe(coal_mining.training_data()) net = BayesNet(model.switchpoint.iter_connected_graph()) samples = sample(net=net, sample_from=net.iter_latent_vertices(), draws=2000, drop=100, down_sample_interval=5) vertex_samples: List[primitive_types] = samples["switchpoint"] vertex_samples_concatentated: np.ndarray = np.array(vertex_samples) switch_year = np.argmax(np.bincount(vertex_samples_concatentated)) assert switch_year == 1890
def test_sampling_returns_multi_indexed_dict_of_list_of_scalars_for_mixed_net( algo: Callable[[BayesNet], PosteriorSamplingAlgorithm]) -> None: exp = Exponential(1.) add_rank_2 = exp + np.array([1., 2., 3., 4.]).reshape((2, 2)) add_rank_3 = exp + np.array([1., 2., 3., 4., 1., 2., 3., 4.]).reshape( (2, 2, 2)) gaussian_rank_2 = Gaussian(add_rank_2, 2.) gaussian_rank_3 = Gaussian(add_rank_3, 1.) exp.set_label("exp") gaussian_rank_2.set_label("gaussian") gaussian_rank_3.set_label("gaussian2") mixed_net = BayesNet(exp.iter_connected_graph()) draws = 5 sample_from = list(mixed_net.iter_latent_vertices()) vertex_labels = [vertex.get_label() for vertex in sample_from] samples = sample(net=mixed_net, sample_from=sample_from, sampling_algorithm=algo(mixed_net), draws=draws) assert type(samples) == dict __assert_valid_samples(draws, samples) assert ('exp', (0, )) in samples for i in (0, 1): for j in (0, 1): assert (('gaussian', (i, j)) in samples) df = pd.DataFrame(samples) expected_num_columns = {"exp": 1, "gaussian": 4, "gaussian2": 8} expected_tuple_size = {"exp": 1, "gaussian": 2, "gaussian2": 3} assert len(df.columns.levels[0]) == 3 for parent_column in df.columns.levels[0]: assert parent_column in vertex_labels assert len( df[parent_column].columns) == expected_num_columns[parent_column] for child_column in df[parent_column].columns: assert type(child_column) == tuple assert len(child_column) == expected_tuple_size[parent_column] assert len(df[parent_column][child_column]) == 5 assert type(df[parent_column][child_column][0]) == np.float64
def autocorrelation_example_scalar(): with Model() as m: m.a = Gaussian(20, 1.) m.b = Gaussian(20, 1.) m.c = Gaussian(m.a + m.b, 1.) m.c.observe(43.) m.a.set_value(20.) m.b.set_value(20.) bayes_net = m.to_bayes_net() # %%SNIPPET_START%% PythonScalarAutocorrelation algo = MetropolisHastingsSampler() posterior_samples = sample(net=bayes_net, sample_from=bayes_net.get_latent_vertices(), sampling_algorithm=algo, draws=100) vertex_samples = posterior_samples.get('a') ac = stats.autocorrelation(vertex_samples)
def test_autocorrelation_same_for_streaming_as_batch() -> None: with Model() as model: model.uniform = Uniform(0, 1000) net = model.to_bayes_net() draws = 15 set_starting_state(model) samples = sample(net=net, sample_from=net.get_latent_vertices(), algo="metropolis", draws=draws) set_starting_state(model) iter_samples = generate_samples(net=net, sample_from=net.get_latent_vertices(), algo="metropolis") samples_dataframe = pd.DataFrame() for next_sample in islice(iter_samples, draws): samples_dataframe = samples_dataframe.append(next_sample, ignore_index=True) for vertex_id in samples_dataframe: autocorr_streaming = stats.autocorrelation(list(samples_dataframe[vertex_id].values)) autocorr_batch = stats.autocorrelation(samples[vertex_id]) np.testing.assert_array_equal(autocorr_batch, autocorr_streaming)
def test_coalmining() -> None: KeanuRandom.set_default_random_seed(1) coal_mining = CoalMining() model = coal_mining.model() model.disasters.observe(coal_mining.training_data()) net = BayesNet(model.switchpoint.get_connected_graph()) samples = sample(net=net, sample_from=net.get_latent_vertices(), draws=50000, drop=10000, down_sample_interval=5) vertex_samples: List[numpy_types] = samples["switchpoint"] vertex_samples_primitive: List[List[primitive_types]] = list(map( lambda a: a.tolist(), vertex_samples)) # because you can't concatenate 0-d arrays vertex_samples_concatentated: np.ndarray = np.array(vertex_samples_primitive) switch_year = np.argmax(np.bincount(vertex_samples_concatentated)) assert switch_year == 1890
def test_sampling_returns_dict_of_list_of_ndarrays_for_vertices_in_sample_from(algo: str, net: BayesNet) -> None: draws = 5 sample_from = list(net.get_latent_vertices()) vertex_ids = [vertex.get_id() for vertex in sample_from] samples = sample(net=net, sample_from=sample_from, algo=algo, draws=draws) assert len(samples) == len(vertex_ids) assert type(samples) == dict for vertex_id, vertex_samples in samples.items(): assert vertex_id in vertex_ids assert len(vertex_samples) == draws assert type(vertex_samples) == list assert all(type(sample) == np.ndarray for sample in vertex_samples) assert all(sample.dtype == float for sample in vertex_samples) assert all(sample.shape == () for sample in vertex_samples)
def test_sampling_returns_dict_of_list_of_ndarrays_for_vertices_in_sample_from(algo: PosteriorSamplingAlgorithm, net: BayesNet) -> None: draws = 5 sample_from = list(net.get_latent_vertices()) vertex_labels = [vertex.get_label() for vertex in sample_from] samples = sample(net=net, sample_from=sample_from, sampling_algorithm=algo, draws=draws) assert len(samples) == len(sample_from) assert type(samples) == dict for label, vertex_samples in samples.items(): assert label in vertex_labels assert len(vertex_samples) == draws assert type(vertex_samples) == list assert all(type(sample) == np.ndarray for sample in vertex_samples) assert all(sample.dtype == float for sample in vertex_samples) assert all(sample.shape == () for sample in vertex_samples)
def inference_example_metropolis(): # %%SNIPPET_START%% PythonMetropolisExample with Model() as m: m.a = Gaussian(20., 1.) m.b = Gaussian(20., 1.) m.c = Gaussian(m.a + m.b, 1.) m.c.observe(43.) m.a.set_value(20.) m.b.set_value(20.) bayes_net = m.to_bayes_net() posterior_samples = sample(net=bayes_net, sample_from=bayes_net.get_latent_vertices(), algo="metropolis", draws=100000) average_posterior_a = np.average(posterior_samples.get('a')) average_posterior_b = np.average(posterior_samples.get('b')) actual = average_posterior_a + average_posterior_b
def test_can_specify_nuts_params(net: BayesNet) -> None: algo = NUTSSampler(1000, 0.65, True, 0.1, 10) samples = sample(net, list(net.get_latent_vertices()), algo, draws=500, drop=100)