Example #1
0
def test_lexicographic_combinations():
    """Test ordered combinations."""
    mappings = (([], [[]]), ([[], [1]], []), ([[1], []], []), ([[1],
                                                                [2]], [[1,
                                                                        2]]),
                ([[1], [2, 3]], [[1, 2], [1, 3]]), ([[1, 3], [2]], [[1, 2],
                                                                    [3, 2]]))
    for (input, output) in mappings:
        assert list(utils.lexicographic_combinations(input)) == output
Example #2
0
class TestSprinklerBayesNet(object):
  """Test training on a three-node rain/sprinkler/grass network."""

  def setup(self):
    self.rng = utils.RandomState(seed=0)
    self.net = sprinkler_net.get(self.rng)

  @pytest.mark.slow
  @pytest.mark.parametrize(
    "evidence_index,precompute_gibbs",
    utils.lexicographic_combinations([[0, 1, 2, 3], [True, False]]))
  def test(self, evidence_index, precompute_gibbs):
    evidence = sprinkler_net.evidence(evidence_index)
    evidence_nodes = [self.net.nodes_by_index[index]
                      for index in evidence.keys()]
    inverse_map = invert.compute_inverse_map(
      self.net, evidence_nodes, self.rng)
    assert len(inverse_map) == 2
    trainer = train.Trainer(self.net, inverse_map, precompute_gibbs)
    num_samples = 30000
    for _ in xrange(num_samples):
      sample = self.net.sample()
      trainer.observe(sample)
    trainer.finalize()

    # Compare marginal log probability for evidence node with prior marginals.
    empty_world = random_world.RandomWorld()
    enumerator = exact_inference.Enumerator(self.net, empty_world)
    exact_marginals = enumerator.marginals()
    for evidence_node in evidence_nodes:
      for value in [0, 1]:
        log_prob_true = math.log(exact_marginals[evidence_node.index][value])
        for inverse_net in inverse_map.values():
          log_prob_empirical = inverse_net.nodes_by_index[
            evidence_node.index].log_probability(empty_world, value)
          print abs(log_prob_true - log_prob_empirical)
          assert abs(log_prob_true - log_prob_empirical) < .02

    # For each inverse network, take unconditional samples, compare
    # marginals to prior network.
    num_samples = 30000
    for inverse_net in inverse_map.values():
      counts = [[0, 0], [0, 0], [0, 0]]
      for _ in xrange(num_samples):
        world = inverse_net.sample()
        for (index, value) in world.items():
          counts[index][value] += 1
      for index in [0, 1, 2]:
        true_dist = enumerator.marginalize_node(
          self.net.nodes_by_index[index])
        empirical_dist = utils.normalize(counts[index])
        for (p_true, p_empirical) in zip(true_dist, empirical_dist):
          print abs(p_true - p_empirical)
          assert abs(p_true - p_empirical) < .02
Example #3
0
  def test(self, training_source):
    """Compare dist on final nodes with and without Gibbs precomputation."""
    evidence_nodes = [self.net.nodes_by_index[index]
                      for index in self.evidence.keys()]

    # Set up two trainers, one with Gibbs precomputation, one without.
    print "Computing inverse maps..."
    inverse_map_with_gibbs = invert.compute_inverse_map(
      self.net, evidence_nodes, self.rng, max_inverse_size=1)
    inverse_map_without_gibbs = invert.compute_inverse_map(
      self.net, evidence_nodes, self.rng, max_inverse_size=1)
    trainer_with_gibbs = train.Trainer(
      self.net, inverse_map_with_gibbs, precompute_gibbs=True)
    trainer_without_gibbs = train.Trainer(
      self.net, inverse_map_without_gibbs, precompute_gibbs=False)
    trainers = [trainer_with_gibbs, trainer_without_gibbs]

    # Train based on sampled data.
    print "Training..."
    num_samples = 50000
    if training_source == "prior":
      self.train_from_prior(trainers, num_samples)
    elif training_source == "gibbs-prior":
      self.train_from_gibbs_prior(trainers, num_samples)
    elif training_source == "gibbs-posterior":
      self.train_from_gibbs_posterior(trainers, num_samples)      
    else:
      raise ValueError("Unknown training source.")

    # Go through all nodes, check that all estimated conditional
    # distributions are close to true distributions.
    print "Comparing distributions..."
    error = 0.0
    num_checks = 0
    for node in inverse_map_with_gibbs.keys():
      net_with_gibbs = inverse_map_with_gibbs.get_net(node)
      net_without_gibbs = inverse_map_without_gibbs.get_net(node)
      gibbs_dist = net_with_gibbs.nodes_by_index[node.index].distribution
      estimated_dist = net_without_gibbs.nodes_by_index[node.index].distribution
      for markov_blanket_values in utils.lexicographic_combinations(
          [[0, 1]] * len(node.markov_blanket)):
        for value in node.support:
          true_value = math.exp(
            gibbs_dist.log_probability(markov_blanket_values, value))
          estimated_value = math.exp(
            estimated_dist.log_probability(markov_blanket_values, value))
          error += abs(true_value - estimated_value)
          num_checks += 1
    average_error = error / num_checks
    print average_error
    assert average_error < .05
Example #4
0
def test_gibbs_learner():
  """Verify that Gibbs learner makes same predictions as enumerator."""
  num_samples = 10000
  rng = utils.RandomState(seed=0)
  net = sprinkler_net.get(rng)
  for node in net.nodes_by_index:
    learner = learn.GibbsLearner(node, rng)
    learner.finalize()
    for markov_blanket_values in utils.lexicographic_combinations(
        [[0, 1]] * len(node.markov_blanket)):
      world = random_world.RandomWorld(
        [n.index for n in node.markov_blanket],
        markov_blanket_values)
      enumerator = exact_inference.Enumerator(net, world)
      probabilities = enumerator.marginalize_node(node)
      check_scorer(
        lambda v: learner.log_probability(
          markov_blanket_values, v), probabilities)
      check_sampler(
        lambda: learner.sample(
          markov_blanket_values), probabilities, num_samples)
Example #5
0
class TestInverseChain(object):

  def setup(self):
    self.rng = utils.RandomState(seed=0)
    self.net = sprinkler_net.get(self.rng)

  @pytest.mark.slow
  @pytest.mark.parametrize(
    ("proposal_size", "evidence_index"),
    utils.lexicographic_combinations([[1, 2, 3], [0, 1, 2, 3]]))
  def test_inverse_sampler(self, proposal_size, evidence_index):
    evidence = sprinkler_net.evidence(evidence_index)
    evidence_nodes = [self.net.nodes_by_index[index]
                      for index in evidence.keys()]
    inverse_map = invert.compute_inverse_map(
      self.net, evidence_nodes, self.rng)
    # 2. Initialize inverses with uniform distributions
    trainer = train.Trainer(self.net, inverse_map, precompute_gibbs=False)
    # 3. Generate random data
    for _ in xrange(3):
      world = random_world.RandomWorld(
        range(self.net.node_count),
        [self.rng.choice(node.support)
         for node in self.net.nodes_by_index]
      )
      trainer.observe(world)
    trainer.finalize()
    # 4. Compute true answer
    enumerator = exact_inference.Enumerator(self.net, evidence)
    true_marginals = enumerator.marginals()
    # 5. Compute answer using inverse sampling
    num_samples = 50000
    test_sampler = mcmc.InverseChain(
      self.net, inverse_map, self.rng, evidence, proposal_size)
    test_sampler.initialize_state()
    inverse_marginals = test_sampler.marginals(num_samples)
    assert true_marginals - inverse_marginals < .02
Example #6
0
class TestTriangleNetwork(object):
    def setup(self, determinism=95, seed=None):
        seed = 0 if seed is None else seed
        self.rng = utils.RandomState(seed)
        self.net = triangle_net.get(self.rng, determinism)
        self.evidence = triangle_net.evidence(0, determinism)
        self.evidence_nodes = [
            self.net.nodes_by_index[index] for index in self.evidence.keys()
        ]
        self.num_latent_nodes = len(self.net.nodes()) - len(
            self.evidence_nodes)
        self.marginals = triangle_net.marginals(0, determinism)

    def test_gibbs(self):
        num_test_samples = 5000
        gibbs_chain = mcmc.GibbsChain(self.net, self.rng, self.evidence)
        gibbs_chain.initialize_state()
        gibbs_marginals = gibbs_chain.marginals(num_test_samples)
        gibbs_error = (self.marginals - gibbs_marginals).mean()
        print "Error (Gibbs): {}".format(gibbs_error)
        assert gibbs_error < .05

    def train_inverses(self, inverse_map, num_training_samples,
                       precompute_gibbs):
        trainer = train.Trainer(self.net, inverse_map, precompute_gibbs)
        training_sampler = mcmc.GibbsChain(self.net, self.rng, self.evidence)
        training_sampler.initialize_state()
        counter = marg.MarginalCounter(self.net)
        for _ in xrange(num_training_samples):
            training_sampler.transition()
            trainer.observe(training_sampler.state)
            counter.observe(training_sampler.state)
        trainer.finalize()
        training_error = (self.marginals - counter.marginals()).mean()
        return training_error

    def check_inverses_by_samples(self, inverse_map, max_inverse_size,
                                  num_test_samples):
        test_sampler = mcmc.InverseChain(self.net, inverse_map, self.rng,
                                         self.evidence, max_inverse_size)
        test_sampler.initialize_state()
        counter = marg.MarginalCounter(self.net)
        num_proposals_accepted = 0
        for _ in xrange(num_test_samples):
            accept = test_sampler.transition()
            counter.observe(test_sampler.state)
            num_proposals_accepted += accept
        inverse_marginals = counter.marginals()
        inverses_error = (self.marginals - inverse_marginals).mean()
        return inverses_error, num_proposals_accepted

    def check_inverses_by_time(self, inverse_map, max_inverse_size,
                               test_seconds):
        test_sampler = mcmc.InverseChain(self.net, inverse_map, self.rng,
                                         self.evidence, max_inverse_size)
        test_sampler.initialize_state()
        counter = marg.MarginalCounter(self.net)
        num_proposals_accepted = 0
        start_time = datetime.datetime.now()
        num_proposals = 0
        while (datetime.datetime.now() - start_time).seconds < test_seconds:
            accept = test_sampler.transition()
            counter.observe(test_sampler.state)
            num_proposals_accepted += accept
            num_proposals += self.num_latent_nodes
        inverse_marginals = counter.marginals()
        inverses_error = (self.marginals - inverse_marginals).mean()
        return inverses_error, num_proposals, num_proposals_accepted

    @pytest.mark.slow
    @pytest.mark.parametrize("precompute_gibbs,max_inverse_size",
                             utils.lexicographic_combinations([[True, False],
                                                               [1, 2]]))
    def test_inverses_error(self, precompute_gibbs, max_inverse_size):
        """Verify that error in estimated inverse marginals is low."""
        num_training_samples = 50000
        num_test_samples = 10000

        print "Computing inverse nets..."
        inverse_map = invert.compute_inverse_map(self.net, self.evidence_nodes,
                                                 self.rng, max_inverse_size)

        print "Training on Gibbs samples..."
        training_error = self.train_inverses(inverse_map, num_training_samples,
                                             precompute_gibbs)
        print "Error (training): {}".format(training_error)
        assert training_error < .01

        print "Testing (inverses)..."
        test_error, num_proposals_accepted = self.check_inverses_by_samples(
            inverse_map, max_inverse_size, num_test_samples)
        print "Error (inverses): {}".format(test_error)
        assert test_error < .03

        num_proposals = num_test_samples * self.num_latent_nodes
        print "Accepted {} out of {} proposals".format(num_proposals_accepted,
                                                       num_proposals)

        if max_inverse_size == 1 and precompute_gibbs:
            # Check that all proposals are accepted.
            assert num_proposals_accepted == num_proposals

    @pytest.mark.slow
    def test_inverses_performance(self):
        """Verify that bigger proposal sizes can result in better performance.

    This test uses clock time, not number of samples, to decide how
    many test samples to take.
    """
        num_training_samples = 50000
        test_seconds = 10
        precompute_gibbs = True
        max_inverse_size = 8

        print "Computing inverse nets..."
        inverse_map = invert.compute_inverse_map(self.net, self.evidence_nodes,
                                                 self.rng, max_inverse_size)

        print "Training on Gibbs samples..."
        training_error = self.train_inverses(inverse_map, num_training_samples,
                                             precompute_gibbs)
        print "Error (training): {}".format(training_error)
        assert training_error < .01

        print "Testing (inverses)..."
        for inverse_size in range(1, max_inverse_size + 1):
            test_error, num_proposals, num_accepted = self.check_inverses_by_time(
                inverse_map, inverse_size, test_seconds)
            print "Error (inverses, max inverse size {}): {}".format(
                inverse_size, test_error)
            print "Accepted {} out of {} proposals\n".format(
                num_accepted, num_proposals)

    def profile_inverses(self):
        num_training_samples = 5000
        test_seconds = 10
        precompute_gibbs = False
        max_inverse_size = 3
        start_time = datetime.datetime.now()
        inverse_map = invert.compute_inverse_map(self.net, self.evidence_nodes,
                                                 self.rng, max_inverse_size)
        t1 = datetime.datetime.now()
        print "Time to compute inverse map: {}".format(t1 - start_time)
        self.train_inverses(inverse_map, num_training_samples,
                            precompute_gibbs)
        t2 = datetime.datetime.now()
        print "Time to train inverses: {}".format(t2 - t1)
        self.check_inverses_by_time(inverse_map, max_inverse_size,
                                    test_seconds)
        t3 = datetime.datetime.now()
        print "Time for test sampling: {}".format(t3 - t2)
Example #7
0
def reorder_cpt(old_order, old_domain_sizes, old_probs, new_order):
  """Return reordered CPT based on old and new node indices.
  
  Args:
    old_order: list of indices
    old_domain_sizes: mapping from indices to integers
    old_probs: list of probabilities  
    new_order: list of indices

  Example:
    old_order = [a, b, c]
    old_domain_sizes = [2, 2, 2]  
    old_probs = [.1, .2, .3, .4, .5, .6, .7, .8]
    new_order = [c, a, b]

  Old (unnormalized) CPT:
    a=0, b=0, c=0 .1
    a=0, b=0, c=1 .2
    a=0, b=1, c=0 .3
    a=0, b=1, c=1 .4
    a=1, b=0, c=0 .5
    a=1, b=0, c=1 .6
    a=1, b=1, c=0 .7
    a=1, b=1, c=1 .8  

  New (unnormalized) CPT:
    c=0, a=0, b=0: .1
    c=0, a=0, b=1: .3
    c=0, a=1, b=0: .5
    c=0, a=1, b=1: .7
    c=1, a=0, b=0: .2
    c=1, a=0, b=1: .4
    c=1, a=1, b=0: .6
    c=1, a=1, b=1: .8
  """
  assert len(old_order) == len(old_domain_sizes)
  assert set(old_order) == set(new_order)
  assert len(old_probs) == reduce(operator.mul, old_domain_sizes, 1)

  # Create a mapping from old-order value tuples to probabilities.
  old_domains = [range(domain_size) for domain_size in old_domain_sizes]
  old_value_lists = utils.lexicographic_combinations(old_domains)
  old_value_tuples = [tuple(values) for values in old_value_lists]
  assert len(old_value_tuples) == len(old_probs)
  old_value_tuple_to_prob = dict(zip(old_value_tuples, old_probs))

  # Create mapping from old to new clique member indices.
  old_to_new_index = dict(zip(old_order, new_order))

  # Create mapping from old clique index to domain
  old_index_to_domain = dict(zip(old_order, old_domains))

  # Iterate through new-order value tuples, create reordered list of
  # probabilities.
  new_domains = [old_index_to_domain[old_to_new_index[old_index]]
                 for old_index in old_order]
  new_probs = []
  for new_value_list in utils.lexicographic_combinations(new_domains):
    new_value_tuple = tuple(new_value_list)
    old_value_tuple = tuple(
      utils.reordered_list(new_order, old_order, new_value_tuple))
    prob = old_value_tuple_to_prob[old_value_tuple]
    new_probs.append(prob)

  return new_probs
Example #8
0
        inverse_marginals = enumerator.marginals()
        print true_marginals - inverse_marginals
        assert true_marginals - inverse_marginals < .01

    print "Testing (sampling)..."
    test_sampler = mcmc.InverseChain(net,
                                     inverse_map,
                                     rng,
                                     evidence,
                                     proposal_size=proposal_size)
    test_sampler.initialize_state()
    inverse_marginals = test_sampler.marginals(num_samples)

    print true_marginals - inverse_marginals
    assert true_marginals - inverse_marginals < .02


@pytest.mark.slow
@pytest.mark.parametrize(("proposal_size", "evidence_index"),
                         utils.lexicographic_combinations([[1, 2, 3],
                                                           [0, 1, 2, 3]]))
def test_sprinkler_net(proposal_size, evidence_index):
    rng = utils.RandomState(seed=0)
    net = sprinkler_net.get(rng)
    evidence = sprinkler_net.evidence(evidence_index)
    run_test(rng, net, evidence, proposal_size=proposal_size)


if __name__ == "__main__":
    test_sprinkler_net(proposal_size=2, evidence_index=2)