Ejemplo n.º 1
0
    def test_inverses_performance(self):
        """Verify that bigger proposal sizes can result in better performance.

    This test uses clock time, not number of samples, to decide how
    many test samples to take.
    """
        num_training_samples = 50000
        test_seconds = 10
        precompute_gibbs = True
        max_inverse_size = 8

        print "Computing inverse nets..."
        inverse_map = invert.compute_inverse_map(self.net, self.evidence_nodes,
                                                 self.rng, max_inverse_size)

        print "Training on Gibbs samples..."
        training_error = self.train_inverses(inverse_map, num_training_samples,
                                             precompute_gibbs)
        print "Error (training): {}".format(training_error)
        assert training_error < .01

        print "Testing (inverses)..."
        for inverse_size in range(1, max_inverse_size + 1):
            test_error, num_proposals, num_accepted = self.check_inverses_by_time(
                inverse_map, inverse_size, test_seconds)
            print "Error (inverses, max inverse size {}): {}".format(
                inverse_size, test_error)
            print "Accepted {} out of {} proposals\n".format(
                num_accepted, num_proposals)
Ejemplo n.º 2
0
def run_test(rng, net, evidence, proposal_size):

    num_samples = 100000

    evidence_nodes = [net.nodes_by_index[evidence.keys()[0]]]

    print "Computing trainable inverse nets..."
    inverse_map = invert.compute_inverse_map(net,
                                             evidence_nodes,
                                             rng,
                                             max_inverse_size=proposal_size)

    print "Initializing trainer..."
    trainer = train.Trainer(net, inverse_map, precompute_gibbs=False)

    print "Training..."
    training_sampler = mcmc.GibbsChain(net, rng, evidence)
    training_sampler.initialize_state()
    for _ in xrange(num_samples):
        training_sampler.transition()
        trainer.observe(training_sampler.state)

    print "Finishing training..."
    trainer.finalize()  # Does not include deterministic Gibbs yet!

    print "Computing exact solution..."
    enumerator = exact_inference.Enumerator(net, evidence)
    true_marginals = enumerator.marginals()

    # The following works even when we don't compute full inverse
    # networks, since even incomplete networks still contain all nodes,
    # which allows us to learn marginals.
    print "Testing (exact inference)..."
    for inverse_net in inverse_map.values():
        enumerator = exact_inference.Enumerator(inverse_net, evidence)
        inverse_marginals = enumerator.marginals()
        print true_marginals - inverse_marginals
        assert true_marginals - inverse_marginals < .01

    print "Testing (sampling)..."
    test_sampler = mcmc.InverseChain(net,
                                     inverse_map,
                                     rng,
                                     evidence,
                                     proposal_size=proposal_size)
    test_sampler.initialize_state()
    inverse_marginals = test_sampler.marginals(num_samples)

    print true_marginals - inverse_marginals
    assert true_marginals - inverse_marginals < .02
Ejemplo n.º 3
0
 def test_broad(self, evidence_index):
     """Test inversion across different evidence settings."""
     evidence = sprinkler_net.evidence(evidence_index)
     evidence_nodes = [
         self.net.nodes_by_index[index] for index in evidence.keys()
     ]
     inverse_map = invert.compute_inverse_map(self.net, evidence_nodes,
                                              self.rng)
     assert len(inverse_map) == 2
     for (final_node, inverse_net) in inverse_map.items():
         assert final_node.index not in evidence
         inv_final_node = inverse_net.nodes_by_index[final_node.index]
         assert len(inv_final_node.parents) == 2
         for evidence_node in evidence_nodes:
             inv_evidence_node = inverse_net.nodes_by_index[
                 evidence_node.index]
             assert len(inv_evidence_node.parents) == 0
Ejemplo n.º 4
0
 def profile_inverses(self):
     num_training_samples = 5000
     test_seconds = 10
     precompute_gibbs = False
     max_inverse_size = 3
     start_time = datetime.datetime.now()
     inverse_map = invert.compute_inverse_map(self.net, self.evidence_nodes,
                                              self.rng, max_inverse_size)
     t1 = datetime.datetime.now()
     print "Time to compute inverse map: {}".format(t1 - start_time)
     self.train_inverses(inverse_map, num_training_samples,
                         precompute_gibbs)
     t2 = datetime.datetime.now()
     print "Time to train inverses: {}".format(t2 - t1)
     self.check_inverses_by_time(inverse_map, max_inverse_size,
                                 test_seconds)
     t3 = datetime.datetime.now()
     print "Time for test sampling: {}".format(t3 - t2)
Ejemplo n.º 5
0
 def test_max_inverse_size(self, max_inverse_size):
     evidence = triangle_net.evidence(0)
     evidence_nodes = [
         self.net.nodes_by_index[index] for index in evidence.keys()
     ]
     inverse_map = invert.compute_inverse_map(
         self.net,
         evidence_nodes,
         self.rng,
         max_inverse_size=max_inverse_size)
     assert len(inverse_map) == self.net.node_count - len(evidence_nodes)
     for fwd_final_node, inverse_net in inverse_map.items():
         inverse_net.compile()
         inv_final_node = inverse_net.nodes_by_index[fwd_final_node.index]
         assert inverse_net.nodes_by_topology[-1] == inv_final_node
         assert inv_final_node.parents
         num_nodes_with_parents = 0
         for node in inverse_net.nodes_by_index:
             if node.parents:
                 num_nodes_with_parents += 1
         assert (num_nodes_with_parents == min(
             max_inverse_size, self.net.node_count - len(evidence_nodes)))
Ejemplo n.º 6
0
 def test_detailed(self):
     """Test that inverses for sprinkler network have the correct structure."""
     evidence_nodes = [self.net.find_node("Grass")]
     rain_node = self.net.find_node("Rain")
     sprinkler_node = self.net.find_node("Sprinkler")
     grass_node = self.net.find_node("Grass")
     inverse_map = invert.compute_inverse_map(self.net, evidence_nodes,
                                              self.rng)
     assert len(inverse_map) == 2
     for (final_node, inverse_net) in inverse_map.items():
         inv_rain_node = inverse_net.find_node("Rain")
         inv_sprinkler_node = inverse_net.find_node("Sprinkler")
         inv_grass_node = inverse_net.find_node("Grass")
         inv_final_node = inverse_net.nodes_by_index[final_node.index]
         assert len(inv_grass_node.parents) == 0
         assert len(inv_final_node.parents) == 2
         assert inv_rain_node.support == rain_node.support
         assert inv_sprinkler_node.support == sprinkler_node.support
         assert inv_grass_node.support == grass_node.support
         for node in inverse_net.nodes_by_index:
             if node != inv_grass_node and node != inv_final_node:
                 assert node.parents == [inv_grass_node]
Ejemplo n.º 7
0
    def test_inverse_inferences(self):
        inverse_map = invert.compute_inverse_map(self.net, [self.observation],
                                                 self.rng)
        print inverse_map.nets_by_key
        trueval = 1.09902618098
        # for trainsamps in [50, 100, 200, 500, 1000, 2000, 5000]:
        # for trainsamps in [10, 10000, 100000]:
        # for trainsamps in [20, 20000, 50000]:
        for trainsamps in [10, 100, 1000, 10000, 100000]:
            errors = []
            # for trial in range(50):
            for trial in range(1):
                k = math.sqrt(trainsamps)
                trainer = train.Trainer(self.net, inverse_map, False, k=k)
                for _ in xrange(trainsamps):
                    world = self.net.sample()
                    trainer.observe(world)
                    trainer.finalize()

                evidence = evid.Evidence(keys=[self.observation], values=[5.5])
                proposal_size = 4
                num_samples = 1000
                test_sampler = mcmc.InverseChain(self.net, inverse_map,
                                                 self.rng, evidence,
                                                 proposal_size)
                test_sampler.initialize_state()
                states = []
                for i in range(num_samples):
                    test_sampler.transition()
                    states.append(test_sampler.state)
                    m = np.mean([s[0] for s in states])
                    print 'trainsamps %s trial %s mean %s' % (trainsamps,
                                                              trial, m)
                    errors.append(m - trueval)
                    rmse = math.sqrt(np.mean([e**2 for e in errors]))
                    print 'trainsamps %s rmse %s' % (trainsamps, rmse)
Ejemplo n.º 8
0
def run(job, session):

    print "Starting job..."
    job.start_time = datetime.datetime.now()
    rng = utils.RandomState(job.seed)
    net = triangle_net.get(rng, job.determinism)
    evidence = triangle_net.evidence(0, job.determinism)
    evidence_nodes = [net.nodes_by_index[index] for index in evidence.keys()]
    num_latent_nodes = len(net.nodes()) - len(evidence_nodes)
    marginals = triangle_net.marginals(0, job.determinism)
    job.status = "started"
    session.commit()

    print "Computing inverse map..."
    t0 = datetime.datetime.now()
    inverse_map = invert.compute_inverse_map(net, evidence_nodes, rng,
                                             job.max_inverse_size)
    t1 = datetime.datetime.now()
    job.inversion_seconds = (t1 - t0).total_seconds()
    job.status = "inverted"
    session.commit()

    print "Training inverses..."
    if job.learner == "counts":
        learner_class = learn.CountLearner
    elif job.learner == "lr":
        learner_class = learn.LogisticRegressionLearner
    else:
        raise ValueError("Unknown learner type!")
    trainer = train.Trainer(net, inverse_map, job.precompute_gibbs,
                            learner_class)
    counter = marg.MarginalCounter(net)
    if job.training_source in ("gibbs", "prior+gibbs"):
        training_sampler = mcmc.GibbsChain(net, rng, evidence)
        training_sampler.initialize_state()
        for _ in xrange(job.num_training_samples):
            training_sampler.transition()
            trainer.observe(training_sampler.state)
            counter.observe(training_sampler.state)
    if job.training_source in ("prior", "prior+gibbs"):
        for _ in xrange(job.num_training_samples):
            world = net.sample()
            trainer.observe(world)
            counter.observe(world)
    trainer.finalize()
    job.training_error = (marginals - counter.marginals()).mean()
    t2 = datetime.datetime.now()
    job.training_seconds = (t2 - t1).total_seconds()
    job.status = "trained"
    session.commit()

    print "Testing inverse sampler..."
    test_sampler = mcmc.InverseChain(net, inverse_map, rng, evidence,
                                     job.max_inverse_size)
    test_sampler.initialize_state()
    counter = marg.MarginalCounter(net)
    num_proposals_accepted = 0
    test_start_time = datetime.datetime.now()
    i = 0
    error_integrator = utils.TemporalIntegrator()
    while ((datetime.datetime.now() - test_start_time).total_seconds() <
           job.test_seconds):
        accept = test_sampler.transition()
        counter.observe(test_sampler.state)
        num_proposals_accepted += accept
        i += 1
        if i % 100 == 0:
            error = (marginals - counter.marginals()).mean()
            error_integrator.observe(error)
    final_error = (marginals - counter.marginals()).mean()
    final_time = datetime.datetime.now()
    empirical_test_seconds = (final_time - test_start_time).total_seconds()
    error_integrator.observe(final_error)
    job.test_error = final_error
    job.integrated_error = error_integrator.integral / empirical_test_seconds
    job.test_proposals = i * num_latent_nodes
    job.test_proposals_accepted = num_proposals_accepted
    job.empirical_test_seconds = empirical_test_seconds
Ejemplo n.º 9
0
def test_logistic_regression_mcmc(learner_class_index=0, seed=0):
    max_inverse_size = 30
    train_seconds = 2 * 60
    test_seconds = 60

    rng = utils.RandomState(seed=seed)
    net = triangle_net.get(rng)
    evidence = triangle_net.evidence(0)
    marginals = triangle_net.marginals(0)
    evidence_nodes = [net.nodes_by_index[index] for index in evidence.keys()]
    learner_classes = [
        lambda support, rng: learn.LogisticRegressionLearner(
            support, rng, transform_inputs=learn.identity_transformer),
        lambda support, rng: learn.LogisticRegressionLearner(
            support, rng, transform_inputs=learn.square_transformer),
        learn.CountLearner
    ]
    learner_class = learner_classes[learner_class_index]
    num_latent_nodes = len(net.nodes()) - len(evidence_nodes)

    print "Inverting network..."
    inverse_map = invert.compute_inverse_map(net, evidence_nodes, rng,
                                             max_inverse_size)

    train_start_time = datetime.datetime.now()
    print "Initializing trainer..."
    trainer = train.Trainer(net,
                            inverse_map,
                            False,
                            learner_class=learner_class)
    print "Training..."
    sample = random_world.RandomWorld()
    while ((datetime.datetime.now() - train_start_time).total_seconds() <
           train_seconds):
        sample = net.sample(sample)  # Prior!
        trainer.observe(sample)
        sample.data = {}
    trainer.finalize()

    print "Testing..."
    test_sampler = mcmc.InverseChain(net,
                                     inverse_map,
                                     rng,
                                     evidence,
                                     proposal_size=max_inverse_size)
    test_sampler.initialize_state()
    error_integrator = utils.TemporalIntegrator()
    test_start_time = datetime.datetime.now()
    counter = marg.MarginalCounter(net)
    i = 0
    num_proposals_accepted = 0
    while ((datetime.datetime.now() - test_start_time).total_seconds() <
           test_seconds):
        accept = test_sampler.transition()
        num_proposals_accepted += accept
        counter.observe(test_sampler.state)
        i += 1
        if i % 100 == 0:
            error = (marginals - counter.marginals()).mean()
            error_integrator.observe(error)
    final_time = datetime.datetime.now()
    empirical_test_seconds = (final_time - test_start_time).total_seconds()
    final_error = (marginals - counter.marginals()).mean()
    error_integrator.observe(final_error)
    num_proposals = i * num_latent_nodes
    return (num_proposals, num_proposals_accepted,
            error_integrator.integral / empirical_test_seconds, final_error)