def test_shape_add_remove_sample(self): # test if add_remove_part traverses the sample space correctly # look at how much probability mass objects with a certain number of parts get maxn = 3 s = ShapeTestHypothesis(parts=[CuboidPrimitive()], maxn=3) sample_count = 40000 p = DeterministicMixtureProposal( moves={'add_remove': shape_add_remove_part}, params={'MAX_PART_COUNT': maxn}) sampler = MHSampler(initial_h=s, data=None, proposal=p, burn_in=5000, sample_count=sample_count, best_sample_count=1, thinning_period=1, report_period=4000) run = sampler.sample() # count how many times we sampled each part_count k = np.array([len(sample.parts) for sample in run.samples.samples]) # since the prior p(h) = 1 / part_count and part_count is 1,2 or 3, we expect 6/11 of all samples to have a # single part, 3/11 to have 2 parts, and 2/11 to have 3 parts. self.assertAlmostEqual(np.mean(k == 1), 6.0 / 11.0, places=1) self.assertAlmostEqual(np.mean(k == 2), 3.0 / 11.0, places=1) self.assertAlmostEqual(np.mean(k == 3), 2.0 / 11.0, places=1)
def test_bdaooss_add_remove_sample(self): # test if add_remove samples correctly # sample from the prior using add_remove and see if we get each tree with its expected probability. # here we constrain depth to 1; so, there are 4 possible trees: P, P->P, P->[P, P], P->[P, P, P]. # p({P}) = 1/4, p({P->P}) = 1/16, p({P->{P, P}) = 1/64, p({P->{P,P,P}) = 1/256 # therefore, we expect to see P 64/85 of the time, P->P 16/85 of the time, P->[P, P] 4/85 of the time, and # P->[P, P, P] 1/85 of the time. h = BDAoOSSShapeTestHypothesis(shape=self.h1.shape) p = DeterministicMixtureProposal(moves={"add_remove": bdaooss_add_remove_part}, params={"MAX_DEPTH": 2}) sampler = MHSampler( initial_h=h, proposal=p, data=None, burn_in=0, sample_count=50000, best_sample_count=1, thinning_period=1, report_period=5000, ) run = sampler.sample() k = np.array([len(s.shape.spatial_model.spatial_states) for s in run.samples.samples]) self.assertAlmostEqual(np.mean(k == 1), 64 / 85.0, places=1) self.assertAlmostEqual(np.mean(k == 2), 16 / 85.0, places=1) self.assertAlmostEqual(np.mean(k == 3), 4 / 85.0, places=1) self.assertAlmostEqual(np.mean(k == 4), 1 / 85.0, places=1)
def test_bdaooss_add_remove_sample(self): # test if add_remove samples correctly # sample from the prior using add_remove and see if we get each tree with its expected probability. # here we constrain depth to 1; so, there are 4 possible trees: P, P->P, P->[P, P], P->[P, P, P]. # p({P}) = 1/4, p({P->P}) = 1/16, p({P->{P, P}) = 1/64, p({P->{P,P,P}) = 1/256 # therefore, we expect to see P 64/85 of the time, P->P 16/85 of the time, P->[P, P] 4/85 of the time, and # P->[P, P, P] 1/85 of the time. h = BDAoOSSShapeTestHypothesis(shape=self.h1.shape) p = DeterministicMixtureProposal( moves={'add_remove': bdaooss_add_remove_part}, params={'MAX_DEPTH': 2}) sampler = MHSampler(initial_h=h, proposal=p, data=None, burn_in=0, sample_count=50000, best_sample_count=1, thinning_period=1, report_period=5000) run = sampler.sample() k = np.array([ len(s.shape.spatial_model.spatial_states) for s in run.samples.samples ]) self.assertAlmostEqual(np.mean(k == 1), 64 / 85.0, places=1) self.assertAlmostEqual(np.mean(k == 2), 16 / 85.0, places=1) self.assertAlmostEqual(np.mean(k == 3), 4 / 85.0, places=1) self.assertAlmostEqual(np.mean(k == 4), 1 / 85.0, places=1)
def test_voxel_sample_prior(self): # test if sampling from the prior produces a set of samples with expected frequency statistics. # for the prior defined in VoxelBasedShapeMaxD, we would expect to see an equal number of samples for each # number of partial voxels. Here we constrain depth to 2, so a tree can have at least 0 and at most 9 partial # voxels. Therefore, we would expect to see each number of partial voxels 0.1 of the time. h = VoxelBasedShapeTestHypothesis(voxel=self.v1, max_depth=2) moves = { 'flip_empty_vs_partial': voxel_based_shape_flip_empty_vs_partial, 'flip_full_vs_partial': voxel_based_shape_flip_full_vs_partial, 'flip_full_vs_empty': voxel_based_shape_flip_full_vs_empty } proposal = RandomMixtureProposal(moves=moves, params={'MAX_DEPTH': 2}) sampler = MHSampler(initial_h=h, data=None, proposal=proposal, burn_in=0, thinning_period=1, sample_count=50000, best_sample_count=1, report_period=5000) run = sampler.sample() partial_voxel_counts = np.array([ s.voxel.count_voxels_by_status(PARTIAL_VOXEL) for s in run.samples.samples ]) for i in range(10): self.assertAlmostEqual(np.mean(partial_voxel_counts == i), 1.0 / 10.0, places=1)
def test_paperclip_add_remove_joint_sample(self): # test if add_remove_joint traverses the sample space correctly # look at how much probability mass objects with a certain number of parts get h = PaperClipTestHypothesis(joint_positions=[ np.array((-0.4, 0.0, 0.0)), np.array((0.4, 0.0, 0.0)) ], mid_segment_id=0) h.min_joints = 2 h.max_joints = 4 sample_count = 40000 p = DeterministicMixtureProposal( moves={'add_remove': paperclip_shape_add_remove_joint}, params={'MAX_NEW_SEGMENT_LENGTH': 0.6}) sampler = MHSampler(initial_h=h, data=None, proposal=p, burn_in=5000, sample_count=sample_count, best_sample_count=1, thinning_period=1, report_period=4000) run = sampler.sample() # count how many times we sampled each joint k = np.array([sample.joint_count for sample in run.samples.samples]) # since the prior is uniform and we min_joints=5, max_joints=6, # we expect to see an equal number of samples with 5, 6, or 7 joints. self.assertAlmostEqual(np.mean(k == 2), 1.0 / 3.0, places=1) self.assertAlmostEqual(np.mean(k == 3), 1.0 / 3.0, places=1) self.assertAlmostEqual(np.mean(k == 4), 1.0 / 3.0, places=1)
def test_shape_add_remove_sample(self): # test if add_remove_part traverses the sample space correctly # look at how much probability mass objects with a certain number of parts get maxn = 3 s = ShapeTestHypothesis(parts=[CuboidPrimitive()], maxn=3) sample_count = 40000 p = DeterministicMixtureProposal(moves={'add_remove': shape_add_remove_part}, params={'MAX_PART_COUNT': maxn}) sampler = MHSampler(initial_h=s, data=None, proposal=p, burn_in=5000, sample_count=sample_count, best_sample_count=1, thinning_period=1, report_period=4000) run = sampler.sample() # count how many times we sampled each part_count k = np.array([len(sample.parts) for sample in run.samples.samples]) # since the prior p(h) = 1 / part_count and part_count is 1,2 or 3, we expect 6/11 of all samples to have a # single part, 3/11 to have 2 parts, and 2/11 to have 3 parts. self.assertAlmostEqual(np.mean(k == 1), 6.0 / 11.0, places=1) self.assertAlmostEqual(np.mean(k == 2), 3.0 / 11.0, places=1) self.assertAlmostEqual(np.mean(k == 3), 2.0 / 11.0, places=1)
def test_voxel_sample_prior(self): # test if sampling from the prior produces a set of samples with expected frequency statistics. # for the prior defined in VoxelBasedShapeMaxD, we would expect to see an equal number of samples for each # number of partial voxels. Here we constrain depth to 2, so a tree can have at least 0 and at most 9 partial # voxels. Therefore, we would expect to see each number of partial voxels 0.1 of the time. h = VoxelBasedShapeTestHypothesis(voxel=self.v1, max_depth=2) moves = {'flip_empty_vs_partial': voxel_based_shape_flip_empty_vs_partial, 'flip_full_vs_partial': voxel_based_shape_flip_full_vs_partial, 'flip_full_vs_empty': voxel_based_shape_flip_full_vs_empty} proposal = RandomMixtureProposal(moves=moves, params={'MAX_DEPTH': 2}) sampler = MHSampler(initial_h=h, data=None, proposal=proposal, burn_in=0, thinning_period=1, sample_count=50000, best_sample_count=1, report_period=5000) run = sampler.sample() partial_voxel_counts = np.array([s.voxel.count_voxels_by_status(PARTIAL_VOXEL) for s in run.samples.samples]) for i in range(10): self.assertAlmostEqual(np.mean(partial_voxel_counts == i), 1.0 / 10.0, places=1)
def test_paperclip_add_remove_joint_sample(self): # test if add_remove_joint traverses the sample space correctly # look at how much probability mass objects with a certain number of parts get h = PaperClipTestHypothesis(joint_positions=[np.array((-0.4, 0.0, 0.0)), np.array((0.4, 0.0, 0.0))], mid_segment_id=0) h.min_joints = 2 h.max_joints = 4 sample_count = 40000 p = DeterministicMixtureProposal(moves={'add_remove': paperclip_shape_add_remove_joint}, params={'MAX_NEW_SEGMENT_LENGTH': 0.6}) sampler = MHSampler(initial_h=h, data=None, proposal=p, burn_in=5000, sample_count=sample_count, best_sample_count=1, thinning_period=1, report_period=4000) run = sampler.sample() # count how many times we sampled each joint k = np.array([sample.joint_count for sample in run.samples.samples]) # since the prior is uniform and we min_joints=5, max_joints=6, # we expect to see an equal number of samples with 5, 6, or 7 joints. self.assertAlmostEqual(np.mean(k == 2), 1.0 / 3.0, places=1) self.assertAlmostEqual(np.mean(k == 3), 1.0 / 3.0, places=1) self.assertAlmostEqual(np.mean(k == 4), 1.0 / 3.0, places=1)
def run_chain(name, sampler, initial_h, data, kernel, burn_in, thinning_period, sample_count, best_sample_count, report_period, results_folder, temperatures=None): """Run an MCMC chain and save results. This function is used by run_experiment scripts to run chains and save results. Parameters: name (str): name of the chain. Used as the folder name to save sample images sampler (str): Sampler to use. 'mh' for Metropolis-Hastings, 'pt' for Parallel Tempering initial_h (I3DHypothesis): Initial hypothesis data (numpy.ndarray): Observed data kernel (mcmclib.Proposal): Transition kernel of the chain burn_in (int): Number of burn in iterations thinning_period (int): Keep every ith sample sample_count (int): Number of samples to take best_sample_count (int): Size of the best samples list report_period (int): Report the status of the chain every report_period iterations results_folder (str): Folder to save the results temperatures (list): Temperatures of each chain for Parallel Tempering sampler Returns: dict: results """ if sampler == 'mh': from mcmclib.mh_sampler import MHSampler sampler = MHSampler(initial_h=initial_h, data=data, proposal=kernel, burn_in=burn_in, sample_count=sample_count, best_sample_count=best_sample_count, thinning_period=thinning_period, report_period=report_period) elif sampler == 'pt': if temperatures is None: raise ValueError( 'ParallelTempering sampler requires temperatures parameter.') chain_count = len(temperatures) from mcmclib.parallel_tempering_sampler import ParallelTemperingSampler sampler = ParallelTemperingSampler( initial_hs=[initial_h] * chain_count, data=data, proposals=[kernel] * chain_count, temperatures=temperatures, burn_in=burn_in, sample_count=sample_count, best_sample_count=best_sample_count, thinning_period=int(thinning_period / chain_count), report_period=int(report_period / chain_count)) else: raise ValueError('Unknown sampler. Possible choices are mh and pt.') start = time.time() run = sampler.sample() end = time.time() # generate a random run id run_id = np.random.randint(1000000) run_file = "{0:s}/{1:s}_{2:s}_{3:06d}.pkl".format( results_folder, name, time.strftime("%Y%m%d_%H%M%S", time.localtime(start)), run_id) run.save(run_file) # save images of samples to disk fwm2 = vfm.VisionForwardModel(render_size=(300, 300)) try: os.mkdir("{0:s}/{1:s}".format(results_folder, name)) except OSError as e: warnings.warn(e.message) for i, sample in enumerate(run.samples.samples): fwm2.save_render( "{0:s}/{1:s}/s{2:d}.png".format(results_folder, name, i), sample) for i, sample in enumerate(run.best_samples.samples): fwm2.save_render( "{0:s}/{1:s}/b{2:d}.png".format(results_folder, name, i), sample) sample_lls = [ sample.log_likelihood(data) for sample in run.samples.samples ] best_lls = [ sample.log_likelihood(data) for sample in run.best_samples.samples ] mse_best = -2 * initial_h.params['LL_VARIANCE'] * np.max(best_lls) mse_mean = -2 * initial_h.params['LL_VARIANCE'] * np.mean(best_lls) mse_sample = -2 * initial_h.params['LL_VARIANCE'] * np.mean(sample_lls) # form the results dictionary results = { 'run_id': run_id, 'run_file': run_file, 'mean_acceptance_rate': run.run_log.IsAccepted.mean(), 'start_time': start, 'end_time': end, 'duration': (end - start) / 60.0, 'best_posterior': np.max(run.best_samples.log_probs), 'best_ll': np.max(best_lls), 'mse': mse_best, 'mean_best_posterior': np.mean(run.best_samples.log_probs), 'mean_best_ll': np.mean(best_lls), 'mse_mean': mse_mean, 'mean_sample_posterior': np.mean(run.samples.log_probs), 'mean_sample_ll': np.mean(sample_lls), 'mse_sample': mse_sample } # add acceptance rate by move to results acc_rate_by_move = run.acceptance_rate_by_move() acc_rates = dict( zip(acc_rate_by_move.MoveType, acc_rate_by_move.AcceptanceRate)) results.update(acc_rates) return results
def run_chain(name, sampler, initial_h, data, kernel, burn_in, thinning_period, sample_count, best_sample_count, report_period, results_folder, temperatures=None): """Run an MCMC chain and save results. This function is used by run_experiment scripts to run chains and save results. Parameters: name (str): name of the chain. Used as the folder name to save sample images sampler (str): Sampler to use. 'mh' for Metropolis-Hastings, 'pt' for Parallel Tempering initial_h (I3DHypothesis): Initial hypothesis data (numpy.ndarray): Observed data kernel (mcmclib.Proposal): Transition kernel of the chain burn_in (int): Number of burn in iterations thinning_period (int): Keep every ith sample sample_count (int): Number of samples to take best_sample_count (int): Size of the best samples list report_period (int): Report the status of the chain every report_period iterations results_folder (str): Folder to save the results temperatures (list): Temperatures of each chain for Parallel Tempering sampler Returns: dict: results """ if sampler == 'mh': from mcmclib.mh_sampler import MHSampler sampler = MHSampler(initial_h=initial_h, data=data, proposal=kernel, burn_in=burn_in, sample_count=sample_count, best_sample_count=best_sample_count, thinning_period=thinning_period, report_period=report_period) elif sampler == 'pt': if temperatures is None: raise ValueError('ParallelTempering sampler requires temperatures parameter.') chain_count = len(temperatures) from mcmclib.parallel_tempering_sampler import ParallelTemperingSampler sampler = ParallelTemperingSampler(initial_hs=[initial_h]*chain_count, data=data, proposals=[kernel]*chain_count, temperatures=temperatures, burn_in=burn_in, sample_count=sample_count, best_sample_count=best_sample_count, thinning_period=int(thinning_period / chain_count), report_period=int(report_period / chain_count)) else: raise ValueError('Unknown sampler. Possible choices are mh and pt.') start = time.time() run = sampler.sample() end = time.time() # generate a random run id run_id = np.random.randint(1000000) run_file = "{0:s}/{1:s}_{2:s}_{3:06d}.pkl".format(results_folder, name, time.strftime("%Y%m%d_%H%M%S", time.localtime(start)), run_id) run.save(run_file) # save images of samples to disk fwm2 = vfm.VisionForwardModel(render_size=(300, 300)) try: os.mkdir("{0:s}/{1:s}".format(results_folder, name)) except OSError as e: warnings.warn(e.message) for i, sample in enumerate(run.samples.samples): fwm2.save_render("{0:s}/{1:s}/s{2:d}.png".format(results_folder, name, i), sample) for i, sample in enumerate(run.best_samples.samples): fwm2.save_render("{0:s}/{1:s}/b{2:d}.png".format(results_folder, name, i), sample) sample_lls = [sample.log_likelihood(data) for sample in run.samples.samples] best_lls = [sample.log_likelihood(data) for sample in run.best_samples.samples] mse_best = -2 * initial_h.params['LL_VARIANCE'] * np.max(best_lls) mse_mean = -2 * initial_h.params['LL_VARIANCE'] * np.mean(best_lls) mse_sample = -2 * initial_h.params['LL_VARIANCE'] * np.mean(sample_lls) # form the results dictionary results = {'run_id': run_id, 'run_file': run_file, 'mean_acceptance_rate': run.run_log.IsAccepted.mean(), 'start_time': start, 'end_time': end, 'duration': (end - start) / 60.0, 'best_posterior': np.max(run.best_samples.log_probs), 'best_ll': np.max(best_lls), 'mse': mse_best, 'mean_best_posterior': np.mean(run.best_samples.log_probs), 'mean_best_ll': np.mean(best_lls), 'mse_mean': mse_mean, 'mean_sample_posterior': np.mean(run.samples.log_probs), 'mean_sample_ll': np.mean(sample_lls), 'mse_sample': mse_sample} # add acceptance rate by move to results acc_rate_by_move = run.acceptance_rate_by_move() acc_rates = dict(zip(acc_rate_by_move.MoveType, acc_rate_by_move.AcceptanceRate)) results.update(acc_rates) return results