def test_random_world(): a, b, c = [bayesnet.BayesNetNode(i) for i in [0, 1, 2]] # Empty world world = random_world.RandomWorld() assert not world assert len(world) == 0 assert len(world.copy()) == 0 assert len(world.extend(a, 1)) == 1 world[b] = 2 assert len(world) == 1 assert world[b] == 2 assert world.items() == [(1, 2)] assert b in world assert a not in world assert world assert list(world) == [1] del world[b] assert b not in world # Nonempty world nodes = [a, b, c] values = [True, False, 3] world = random_world.RandomWorld(nodes, values) assert set(world.keys()) == {0, 1, 2} assert set(world.values()) == {True, False, 3} assert world assert len(world) == 3 for node, value in zip(nodes, values): assert node in world assert world[node] == value assert sorted(list(world)) == [0, 1, 2] assert str(world) == repr(world)
def test_inverse_sampler(self, proposal_size, evidence_index): evidence = sprinkler_net.evidence(evidence_index) evidence_nodes = [self.net.nodes_by_index[index] for index in evidence.keys()] inverse_map = invert.compute_inverse_map( self.net, evidence_nodes, self.rng) # 2. Initialize inverses with uniform distributions trainer = train.Trainer(self.net, inverse_map, precompute_gibbs=False) # 3. Generate random data for _ in xrange(3): world = random_world.RandomWorld( range(self.net.node_count), [self.rng.choice(node.support) for node in self.net.nodes_by_index] ) trainer.observe(world) trainer.finalize() # 4. Compute true answer enumerator = exact_inference.Enumerator(self.net, evidence) true_marginals = enumerator.marginals() # 5. Compute answer using inverse sampling num_samples = 50000 test_sampler = mcmc.InverseChain( self.net, inverse_map, self.rng, evidence, proposal_size) test_sampler.initialize_state() inverse_marginals = test_sampler.marginals(num_samples) assert true_marginals - inverse_marginals < .02
def test_abstract_bayesnet_node(): node = bayesnet.BayesNetNode(index=0, name="Foo") world = random_world.RandomWorld() with pytest.raises(NotImplementedError): node.sample(world) with pytest.raises(NotImplementedError): node.log_probability(world, 1)
def gibbs_probabilities(node, world): """Given values for Markov blanket of node, compute Gibbs probabilities. Args: node: a BayesNetNode world: a random world that includes all nodes in the Markov blanket of the node of interest Returns: a list of probabilities, one for each value in the support of node. """ coparents = [] for child in node.children: for parent in child.parents: if parent != node: coparents.append(parent) coparents = set(coparents) coparent_world = random_world.RandomWorld( coparents, [world[coparent] for coparent in coparents]) gibbs_probs = [] for value in node.support: node_logprob = node.log_probability(world, value) coparent_world[node] = value children_logprob = sum( child.log_probability(coparent_world, world[child]) for child in node.children) gibbs_probs.append(math.exp(node_logprob + children_logprob)) return gibbs_probs
def test_enumerate(self): """Check that computed marginal probability is correct.""" grass_node = self.net.find_node("Grass") rain_node = self.net.find_node("Rain") evidence = random_world.RandomWorld([grass_node], [1]) enumerator = exact_inference.Enumerator(self.net, evidence) inferred_dist = enumerator.marginalize_node(rain_node) np.testing.assert_almost_equal(inferred_dist[1], 0.24277141298417898)
def transition(self): accepted = False world = None while not accepted: world = self.net.sample(random_world.RandomWorld()) accepted = True for (node, value) in self.evidence.items(): if world[node] != value: accepted = False break self.state = world
def sample(self, world=None, mutate_world=False): """Sample a random world, potentially based on existing world.""" if world: if not mutate_world: world = world.copy() else: world = random_world.RandomWorld() for node in self.nodes_by_topology: if not node in world: world.data[node.index] = node.sample(world) return world
def test_nodes(self): """Check sampling and probability functions of nodes.""" world = random_world.RandomWorld( keys=[self.rain_node], values=[1]) np.testing.assert_almost_equal( math.exp(self.sprinkler_node.log_probability(world, 1)), 0.4) sprinkler_count = 0 for _ in xrange(self.n): value = self.sprinkler_node.sample(world) if value == 1: sprinkler_count += 1 utils.assert_in_interval(sprinkler_count, .4, self.n, .95) world = random_world.RandomWorld( keys=[self.rain_node, self.sprinkler_node], values=[0, 1]) np.testing.assert_almost_equal( math.exp(self.grass_node.log_probability(world, 1)), 0.7)
def test_network(self): """Test a simple Bayesian network.""" # First random world (prior) world = self.net.sample() assert world[self.node_1] == 1 assert world[self.node_2] == 2 assert self.net.log_probability(world) == utils.LOG_PROB_1 # Second random world (node 1 fixed) world = self.net.sample(random_world.RandomWorld([self.node_1], [0])) assert world[self.node_2] == 1 assert self.net.log_probability(world) == utils.LOG_PROB_0
def test_string_import(self): """Check that probabilities for imported Bayes net are as expected.""" for network_string in [NETWORK_STRING_A, NETWORK_STRING_B]: net = uai_import.network_from_string(network_string, utils.RandomState()) assert len(net.sample()) == 3 for (values, prob) in NETWORK_PROBABILITIES: world = random_world.RandomWorld(keys=net.nodes_by_index, values=values) np.testing.assert_almost_equal(net.log_probability(world), utils.safe_log(prob))
def test_marg_counter(): rng = utils.RandomState(seed=0) net = sprinkler_net.get(rng) counter = marg.MarginalCounter(net) with pytest.raises(AssertionError): counter.marginals() value_lists = [[0, 0, 0], [0, 0, 1], [0, 1, 1]] for values in value_lists: world = random_world.RandomWorld([0, 1, 2], values) counter.observe(world) assert counter.num_observations == 3 marginals = counter.marginals() np.testing.assert_almost_equal(marginals[0][0], 1.0) np.testing.assert_almost_equal(marginals[0][1], 0.0) np.testing.assert_almost_equal(marginals[1][0], 2 / 3) np.testing.assert_almost_equal(marginals[1][1], 1 / 3) np.testing.assert_almost_equal(marginals[2][0], 1 / 3) np.testing.assert_almost_equal(marginals[2][1], 2 / 3)
def run_sprinkler(self, chain_class): """Check that inference result is close to truth.""" grass_node = self.net.find_node("Grass") rain_node = self.net.find_node("Rain") evidence = random_world.RandomWorld([grass_node], [True]) chain = chain_class(self.net, self.rng, evidence) chain.initialize_state() rain_count = 0 num_samples = 100000 for _ in xrange(num_samples): chain.transition() if chain.state[rain_node]: rain_count += 1 enumerator = exact_inference.Enumerator(self.net, evidence) exact_dist = enumerator.marginalize_node(rain_node) rain_prob = exact_dist[True] print rain_prob, rain_count utils.assert_in_interval(rain_count, rain_prob, num_samples, .95)
def test(self, evidence_index, precompute_gibbs): evidence = sprinkler_net.evidence(evidence_index) evidence_nodes = [self.net.nodes_by_index[index] for index in evidence.keys()] inverse_map = invert.compute_inverse_map( self.net, evidence_nodes, self.rng) assert len(inverse_map) == 2 trainer = train.Trainer(self.net, inverse_map, precompute_gibbs) num_samples = 30000 for _ in xrange(num_samples): sample = self.net.sample() trainer.observe(sample) trainer.finalize() # Compare marginal log probability for evidence node with prior marginals. empty_world = random_world.RandomWorld() enumerator = exact_inference.Enumerator(self.net, empty_world) exact_marginals = enumerator.marginals() for evidence_node in evidence_nodes: for value in [0, 1]: log_prob_true = math.log(exact_marginals[evidence_node.index][value]) for inverse_net in inverse_map.values(): log_prob_empirical = inverse_net.nodes_by_index[ evidence_node.index].log_probability(empty_world, value) print abs(log_prob_true - log_prob_empirical) assert abs(log_prob_true - log_prob_empirical) < .02 # For each inverse network, take unconditional samples, compare # marginals to prior network. num_samples = 30000 for inverse_net in inverse_map.values(): counts = [[0, 0], [0, 0], [0, 0]] for _ in xrange(num_samples): world = inverse_net.sample() for (index, value) in world.items(): counts[index][value] += 1 for index in [0, 1, 2]: true_dist = enumerator.marginalize_node( self.net.nodes_by_index[index]) empirical_dist = utils.normalize(counts[index]) for (p_true, p_empirical) in zip(true_dist, empirical_dist): print abs(p_true - p_empirical) assert abs(p_true - p_empirical) < .02
def test_gibbs_learner(): """Verify that Gibbs learner makes same predictions as enumerator.""" num_samples = 10000 rng = utils.RandomState(seed=0) net = sprinkler_net.get(rng) for node in net.nodes_by_index: learner = learn.GibbsLearner(node, rng) learner.finalize() for markov_blanket_values in utils.lexicographic_combinations( [[0, 1]] * len(node.markov_blanket)): world = random_world.RandomWorld( [n.index for n in node.markov_blanket], markov_blanket_values) enumerator = exact_inference.Enumerator(net, world) probabilities = enumerator.marginalize_node(node) check_scorer( lambda v: learner.log_probability( markov_blanket_values, v), probabilities) check_sampler( lambda: learner.sample( markov_blanket_values), probabilities, num_samples)
def train_from_gibbs_prior(self, trainers, num_samples): world = random_world.RandomWorld() self.train_from_gibbs(trainers, num_samples, world)
def test_logistic_regression_mcmc(learner_class_index=0, seed=0): max_inverse_size = 30 train_seconds = 2 * 60 test_seconds = 60 rng = utils.RandomState(seed=seed) net = triangle_net.get(rng) evidence = triangle_net.evidence(0) marginals = triangle_net.marginals(0) evidence_nodes = [net.nodes_by_index[index] for index in evidence.keys()] learner_classes = [ lambda support, rng: learn.LogisticRegressionLearner( support, rng, transform_inputs=learn.identity_transformer), lambda support, rng: learn.LogisticRegressionLearner( support, rng, transform_inputs=learn.square_transformer), learn.CountLearner ] learner_class = learner_classes[learner_class_index] num_latent_nodes = len(net.nodes()) - len(evidence_nodes) print "Inverting network..." inverse_map = invert.compute_inverse_map(net, evidence_nodes, rng, max_inverse_size) train_start_time = datetime.datetime.now() print "Initializing trainer..." trainer = train.Trainer(net, inverse_map, False, learner_class=learner_class) print "Training..." sample = random_world.RandomWorld() while ((datetime.datetime.now() - train_start_time).total_seconds() < train_seconds): sample = net.sample(sample) # Prior! trainer.observe(sample) sample.data = {} trainer.finalize() print "Testing..." test_sampler = mcmc.InverseChain(net, inverse_map, rng, evidence, proposal_size=max_inverse_size) test_sampler.initialize_state() error_integrator = utils.TemporalIntegrator() test_start_time = datetime.datetime.now() counter = marg.MarginalCounter(net) i = 0 num_proposals_accepted = 0 while ((datetime.datetime.now() - test_start_time).total_seconds() < test_seconds): accept = test_sampler.transition() num_proposals_accepted += accept counter.observe(test_sampler.state) i += 1 if i % 100 == 0: error = (marginals - counter.marginals()).mean() error_integrator.observe(error) final_time = datetime.datetime.now() empirical_test_seconds = (final_time - test_start_time).total_seconds() final_error = (marginals - counter.marginals()).mean() error_integrator.observe(final_error) num_proposals = i * num_latent_nodes return (num_proposals, num_proposals_accepted, error_integrator.integral / empirical_test_seconds, final_error)