def test_inverses_performance(self): """Verify that bigger proposal sizes can result in better performance. This test uses clock time, not number of samples, to decide how many test samples to take. """ num_training_samples = 50000 test_seconds = 10 precompute_gibbs = True max_inverse_size = 8 print "Computing inverse nets..." inverse_map = invert.compute_inverse_map(self.net, self.evidence_nodes, self.rng, max_inverse_size) print "Training on Gibbs samples..." training_error = self.train_inverses(inverse_map, num_training_samples, precompute_gibbs) print "Error (training): {}".format(training_error) assert training_error < .01 print "Testing (inverses)..." for inverse_size in range(1, max_inverse_size + 1): test_error, num_proposals, num_accepted = self.check_inverses_by_time( inverse_map, inverse_size, test_seconds) print "Error (inverses, max inverse size {}): {}".format( inverse_size, test_error) print "Accepted {} out of {} proposals\n".format( num_accepted, num_proposals)
def run_test(rng, net, evidence, proposal_size): num_samples = 100000 evidence_nodes = [net.nodes_by_index[evidence.keys()[0]]] print "Computing trainable inverse nets..." inverse_map = invert.compute_inverse_map(net, evidence_nodes, rng, max_inverse_size=proposal_size) print "Initializing trainer..." trainer = train.Trainer(net, inverse_map, precompute_gibbs=False) print "Training..." training_sampler = mcmc.GibbsChain(net, rng, evidence) training_sampler.initialize_state() for _ in xrange(num_samples): training_sampler.transition() trainer.observe(training_sampler.state) print "Finishing training..." trainer.finalize() # Does not include deterministic Gibbs yet! print "Computing exact solution..." enumerator = exact_inference.Enumerator(net, evidence) true_marginals = enumerator.marginals() # The following works even when we don't compute full inverse # networks, since even incomplete networks still contain all nodes, # which allows us to learn marginals. print "Testing (exact inference)..." for inverse_net in inverse_map.values(): enumerator = exact_inference.Enumerator(inverse_net, evidence) inverse_marginals = enumerator.marginals() print true_marginals - inverse_marginals assert true_marginals - inverse_marginals < .01 print "Testing (sampling)..." test_sampler = mcmc.InverseChain(net, inverse_map, rng, evidence, proposal_size=proposal_size) test_sampler.initialize_state() inverse_marginals = test_sampler.marginals(num_samples) print true_marginals - inverse_marginals assert true_marginals - inverse_marginals < .02
def test_broad(self, evidence_index): """Test inversion across different evidence settings.""" evidence = sprinkler_net.evidence(evidence_index) evidence_nodes = [ self.net.nodes_by_index[index] for index in evidence.keys() ] inverse_map = invert.compute_inverse_map(self.net, evidence_nodes, self.rng) assert len(inverse_map) == 2 for (final_node, inverse_net) in inverse_map.items(): assert final_node.index not in evidence inv_final_node = inverse_net.nodes_by_index[final_node.index] assert len(inv_final_node.parents) == 2 for evidence_node in evidence_nodes: inv_evidence_node = inverse_net.nodes_by_index[ evidence_node.index] assert len(inv_evidence_node.parents) == 0
def profile_inverses(self): num_training_samples = 5000 test_seconds = 10 precompute_gibbs = False max_inverse_size = 3 start_time = datetime.datetime.now() inverse_map = invert.compute_inverse_map(self.net, self.evidence_nodes, self.rng, max_inverse_size) t1 = datetime.datetime.now() print "Time to compute inverse map: {}".format(t1 - start_time) self.train_inverses(inverse_map, num_training_samples, precompute_gibbs) t2 = datetime.datetime.now() print "Time to train inverses: {}".format(t2 - t1) self.check_inverses_by_time(inverse_map, max_inverse_size, test_seconds) t3 = datetime.datetime.now() print "Time for test sampling: {}".format(t3 - t2)
def test_max_inverse_size(self, max_inverse_size): evidence = triangle_net.evidence(0) evidence_nodes = [ self.net.nodes_by_index[index] for index in evidence.keys() ] inverse_map = invert.compute_inverse_map( self.net, evidence_nodes, self.rng, max_inverse_size=max_inverse_size) assert len(inverse_map) == self.net.node_count - len(evidence_nodes) for fwd_final_node, inverse_net in inverse_map.items(): inverse_net.compile() inv_final_node = inverse_net.nodes_by_index[fwd_final_node.index] assert inverse_net.nodes_by_topology[-1] == inv_final_node assert inv_final_node.parents num_nodes_with_parents = 0 for node in inverse_net.nodes_by_index: if node.parents: num_nodes_with_parents += 1 assert (num_nodes_with_parents == min( max_inverse_size, self.net.node_count - len(evidence_nodes)))
def test_detailed(self): """Test that inverses for sprinkler network have the correct structure.""" evidence_nodes = [self.net.find_node("Grass")] rain_node = self.net.find_node("Rain") sprinkler_node = self.net.find_node("Sprinkler") grass_node = self.net.find_node("Grass") inverse_map = invert.compute_inverse_map(self.net, evidence_nodes, self.rng) assert len(inverse_map) == 2 for (final_node, inverse_net) in inverse_map.items(): inv_rain_node = inverse_net.find_node("Rain") inv_sprinkler_node = inverse_net.find_node("Sprinkler") inv_grass_node = inverse_net.find_node("Grass") inv_final_node = inverse_net.nodes_by_index[final_node.index] assert len(inv_grass_node.parents) == 0 assert len(inv_final_node.parents) == 2 assert inv_rain_node.support == rain_node.support assert inv_sprinkler_node.support == sprinkler_node.support assert inv_grass_node.support == grass_node.support for node in inverse_net.nodes_by_index: if node != inv_grass_node and node != inv_final_node: assert node.parents == [inv_grass_node]
def test_inverse_inferences(self): inverse_map = invert.compute_inverse_map(self.net, [self.observation], self.rng) print inverse_map.nets_by_key trueval = 1.09902618098 # for trainsamps in [50, 100, 200, 500, 1000, 2000, 5000]: # for trainsamps in [10, 10000, 100000]: # for trainsamps in [20, 20000, 50000]: for trainsamps in [10, 100, 1000, 10000, 100000]: errors = [] # for trial in range(50): for trial in range(1): k = math.sqrt(trainsamps) trainer = train.Trainer(self.net, inverse_map, False, k=k) for _ in xrange(trainsamps): world = self.net.sample() trainer.observe(world) trainer.finalize() evidence = evid.Evidence(keys=[self.observation], values=[5.5]) proposal_size = 4 num_samples = 1000 test_sampler = mcmc.InverseChain(self.net, inverse_map, self.rng, evidence, proposal_size) test_sampler.initialize_state() states = [] for i in range(num_samples): test_sampler.transition() states.append(test_sampler.state) m = np.mean([s[0] for s in states]) print 'trainsamps %s trial %s mean %s' % (trainsamps, trial, m) errors.append(m - trueval) rmse = math.sqrt(np.mean([e**2 for e in errors])) print 'trainsamps %s rmse %s' % (trainsamps, rmse)
def run(job, session): print "Starting job..." job.start_time = datetime.datetime.now() rng = utils.RandomState(job.seed) net = triangle_net.get(rng, job.determinism) evidence = triangle_net.evidence(0, job.determinism) evidence_nodes = [net.nodes_by_index[index] for index in evidence.keys()] num_latent_nodes = len(net.nodes()) - len(evidence_nodes) marginals = triangle_net.marginals(0, job.determinism) job.status = "started" session.commit() print "Computing inverse map..." t0 = datetime.datetime.now() inverse_map = invert.compute_inverse_map(net, evidence_nodes, rng, job.max_inverse_size) t1 = datetime.datetime.now() job.inversion_seconds = (t1 - t0).total_seconds() job.status = "inverted" session.commit() print "Training inverses..." if job.learner == "counts": learner_class = learn.CountLearner elif job.learner == "lr": learner_class = learn.LogisticRegressionLearner else: raise ValueError("Unknown learner type!") trainer = train.Trainer(net, inverse_map, job.precompute_gibbs, learner_class) counter = marg.MarginalCounter(net) if job.training_source in ("gibbs", "prior+gibbs"): training_sampler = mcmc.GibbsChain(net, rng, evidence) training_sampler.initialize_state() for _ in xrange(job.num_training_samples): training_sampler.transition() trainer.observe(training_sampler.state) counter.observe(training_sampler.state) if job.training_source in ("prior", "prior+gibbs"): for _ in xrange(job.num_training_samples): world = net.sample() trainer.observe(world) counter.observe(world) trainer.finalize() job.training_error = (marginals - counter.marginals()).mean() t2 = datetime.datetime.now() job.training_seconds = (t2 - t1).total_seconds() job.status = "trained" session.commit() print "Testing inverse sampler..." test_sampler = mcmc.InverseChain(net, inverse_map, rng, evidence, job.max_inverse_size) test_sampler.initialize_state() counter = marg.MarginalCounter(net) num_proposals_accepted = 0 test_start_time = datetime.datetime.now() i = 0 error_integrator = utils.TemporalIntegrator() while ((datetime.datetime.now() - test_start_time).total_seconds() < job.test_seconds): accept = test_sampler.transition() counter.observe(test_sampler.state) num_proposals_accepted += accept i += 1 if i % 100 == 0: error = (marginals - counter.marginals()).mean() error_integrator.observe(error) final_error = (marginals - counter.marginals()).mean() final_time = datetime.datetime.now() empirical_test_seconds = (final_time - test_start_time).total_seconds() error_integrator.observe(final_error) job.test_error = final_error job.integrated_error = error_integrator.integral / empirical_test_seconds job.test_proposals = i * num_latent_nodes job.test_proposals_accepted = num_proposals_accepted job.empirical_test_seconds = empirical_test_seconds
def test_logistic_regression_mcmc(learner_class_index=0, seed=0): max_inverse_size = 30 train_seconds = 2 * 60 test_seconds = 60 rng = utils.RandomState(seed=seed) net = triangle_net.get(rng) evidence = triangle_net.evidence(0) marginals = triangle_net.marginals(0) evidence_nodes = [net.nodes_by_index[index] for index in evidence.keys()] learner_classes = [ lambda support, rng: learn.LogisticRegressionLearner( support, rng, transform_inputs=learn.identity_transformer), lambda support, rng: learn.LogisticRegressionLearner( support, rng, transform_inputs=learn.square_transformer), learn.CountLearner ] learner_class = learner_classes[learner_class_index] num_latent_nodes = len(net.nodes()) - len(evidence_nodes) print "Inverting network..." inverse_map = invert.compute_inverse_map(net, evidence_nodes, rng, max_inverse_size) train_start_time = datetime.datetime.now() print "Initializing trainer..." trainer = train.Trainer(net, inverse_map, False, learner_class=learner_class) print "Training..." sample = random_world.RandomWorld() while ((datetime.datetime.now() - train_start_time).total_seconds() < train_seconds): sample = net.sample(sample) # Prior! trainer.observe(sample) sample.data = {} trainer.finalize() print "Testing..." test_sampler = mcmc.InverseChain(net, inverse_map, rng, evidence, proposal_size=max_inverse_size) test_sampler.initialize_state() error_integrator = utils.TemporalIntegrator() test_start_time = datetime.datetime.now() counter = marg.MarginalCounter(net) i = 0 num_proposals_accepted = 0 while ((datetime.datetime.now() - test_start_time).total_seconds() < test_seconds): accept = test_sampler.transition() num_proposals_accepted += accept counter.observe(test_sampler.state) i += 1 if i % 100 == 0: error = (marginals - counter.marginals()).mean() error_integrator.observe(error) final_time = datetime.datetime.now() empirical_test_seconds = (final_time - test_start_time).total_seconds() final_error = (marginals - counter.marginals()).mean() error_integrator.observe(final_error) num_proposals = i * num_latent_nodes return (num_proposals, num_proposals_accepted, error_integrator.integral / empirical_test_seconds, final_error)