def test_empty_prior(): """Check that priors are zero when none are defined.""" # define negative log posterior posterior_fun = pypesto.Objective(fun=negative_log_posterior) # define pypesto problem without prior object test_problem = pypesto.Problem(objective=posterior_fun, lb=-10, ub=10, x_names=['x']) sampler = sample.AdaptiveMetropolisSampler() result = sample.sample( test_problem, n_samples=50, sampler=sampler, x0=np.array([0.0]), filename=None, ) # get log prior values of first chain logprior_trace = -result.sample_result.trace_neglogprior[0, :] # check that all entries are zero assert (logprior_trace == 0.0).all()
def test_autocorrelation_pipeline(): """Check that the autocorrelation test works.""" problem = gaussian_problem() sampler = sample.MetropolisSampler() # optimization result = optimize.minimize(problem, n_starts=3) # sample result = sample.sample( problem, sampler=sampler, n_samples=1000, result=result) # run auto-correlation with previous geweke sample.geweke_test(result) ac1 = sample.auto_correlation(result) # run auto-correlation without previous geweke result.sample_result.burn_in = None ac2 = sample.auto_correlation(result) assert ac1 == ac2 # run effective sample size with previous geweke # and autocorrelation ess1 = sample.effective_sample_size(result) # run effective sample size without previous geweke # and autocorrelation result.sample_result.burn_in = None result.sample_result.auto_correlation = None ess2 = sample.effective_sample_size(result) assert ess1 == ess2
def test_autocorrelation_short_chain(): """Check that the autocorrelation reacts nicely to small sample numbers.""" problem = gaussian_problem() sampler = sample.MetropolisSampler() # optimization result = optimize.minimize(problem, n_starts=3, filename=None) # sample result = sample.sample(problem, sampler=sampler, n_samples=10, result=result, filename=None) # manually set burn in to chain length (only for testing!!) chain_length = result.sample_result.trace_x.shape[1] result.sample_result.burn_in = chain_length # run auto-correlation ac = sample.auto_correlation(result) assert ac is None # run effective sample size ess = sample.effective_sample_size(result) assert ess is None
def test_ground_truth(): """Test whether we actually retrieve correct distributions.""" # use best self-implemented sampler, which has a chance of correctly # sample from the distribution sampler = sample.AdaptiveParallelTemperingSampler( internal_sampler=sample.AdaptiveMetropolisSampler(), n_chains=5) problem = gaussian_problem() result = optimize.minimize(problem, filename=None) result = sample.sample(problem, n_samples=5000, result=result, sampler=sampler, filename=None) # get samples of first chain samples = result.sample_result.trace_x[0].flatten() # test against different distributions statistic, pval = kstest(samples, 'norm') print(statistic, pval) assert statistic < 0.1 statistic, pval = kstest(samples, 'uniform') print(statistic, pval) assert statistic > 0.1
def test_multiple_startpoints(): problem = gaussian_problem() x0s = [np.array([0]), np.array([1])] sampler = sample.ParallelTemperingSampler( internal_sampler=sample.MetropolisSampler(), n_chains=2) result = sample.sample(problem, n_samples=10, x0=x0s, sampler=sampler) assert result.sample_result.trace_neglogpost.shape[0] == 2 assert [ result.sample_result.trace_x[0][0], result.sample_result.trace_x[1][0] ] == x0s
def sample_petab_problem(): # create problem problem = create_petab_problem() sampler = sample.AdaptiveMetropolisSampler() result = sample.sample( problem, n_samples=1000, sampler=sampler, x0=np.array([3, -4]), filename=None, ) return result
def test_pipeline(sampler, problem): """Check that a typical pipeline runs through.""" # optimization optimizer = optimize.ScipyOptimizer(options={'maxiter': 10}) result = optimize.minimize( problem, n_starts=3, optimizer=optimizer) # sample result = sample.sample( problem, sampler=sampler, n_samples=100, result=result) # some plot visualize.sampling_1d_marginals(result) plt.close()
def test_geweke_test_unconverged(): """Check that the geweke test reacts nicely to small sample numbers.""" problem = gaussian_problem() sampler = sample.MetropolisSampler() # optimization result = optimize.minimize(problem, n_starts=3) # sample result = sample.sample( problem, sampler=sampler, n_samples=100, result=result) # run geweke test (should not fail!) sample.geweke_test(result)
def test_prior(): """Check that priors are defined for sampling.""" # define negative log posterior posterior_fun = pypesto.Objective(fun=negative_log_posterior) # define negative log prior prior_fun = pypesto.Objective(fun=negative_log_prior) # define pypesto prior object prior_object = pypesto.NegLogPriors(objectives=[prior_fun]) # define pypesto problem using prior object test_problem = pypesto.Problem( objective=posterior_fun, x_priors_defs=prior_object, lb=-10, ub=10, x_names=['x'], ) sampler = sample.AdaptiveMetropolisSampler() result = sample.sample( test_problem, n_samples=1e4, sampler=sampler, x0=np.array([0.0]), filename=None, ) # get log prior values of first chain logprior_trace = -result.sample_result.trace_neglogprior[0, :] # check that not all entries are zero assert (logprior_trace != 0.0).any() # get samples of first chain samples = result.sample_result.trace_x[0, :, 0] # generate ground-truth samples rvs = norm.rvs(size=5000, loc=-1.0, scale=np.sqrt(0.7)) # check sample distribution agreement with the ground-truth statistic, pval = ks_2samp(rvs, samples) print(statistic, pval) assert statistic < 0.1
def test_samples_cis(): """ Test whether :py:func:`pypesto.sample.calculate_ci_mcmc_sample` produces percentile-based credibility intervals correctly. """ # load problem problem = gaussian_problem() # set a sampler sampler = sample.MetropolisSampler() # optimization result = optimize.minimize(problem, n_starts=3, filename=None) # sample result = sample.sample(problem, sampler=sampler, n_samples=2000, result=result, filename=None) # run geweke test sample.geweke_test(result) # get converged chain converged_chain = np.asarray( result.sample_result.trace_x[0, result.sample_result.burn_in:, :]) # set confidence levels alpha_values = [0.99, 0.95, 0.68] # loop over confidence levels for alpha in alpha_values: # calculate parameter samples confidence intervals lb, ub = sample.calculate_ci_mcmc_sample(result, ci_level=alpha) # get corresponding percentiles to alpha percentiles = 100 * np.array([(1 - alpha) / 2, 1 - (1 - alpha) / 2]) # check result agreement diff = np.percentile(converged_chain, percentiles, axis=0) - [lb, ub] assert (diff == 0).all() # check if lower bound is smaller than upper bound assert (lb < ub).all() # check if dimmensions agree assert lb.shape == ub.shape
def test_pipeline(sampler, problem): """Check that a typical pipeline runs through.""" # optimization optimizer = optimize.ScipyOptimizer(options={'maxiter': 10}) result = optimize.minimize(problem, n_starts=3, optimizer=optimizer, filename=None) # sample result = sample.sample(problem, sampler=sampler, n_samples=100, result=result, filename=None) # remove warnings in test/sample/test_sample. # Warning here: pypesto/visualize/sampling.py:1104 # geweke test sample.geweke_test(result=result) # some plot visualize.sampling_1d_marginals(result) plt.close()
def test_storage_all(): """Test `read_result` and `write_result`. It currently does not test read/write of the problem as this is know to not work completely. Also excludes testing the history key of an optimization result. """ objective = pypesto.Objective(fun=so.rosen, grad=so.rosen_der, hess=so.rosen_hess) dim_full = 10 lb = -5 * np.ones((dim_full, 1)) ub = 5 * np.ones((dim_full, 1)) n_starts = 5 problem = pypesto.Problem(objective=objective, lb=lb, ub=ub) optimizer = optimize.ScipyOptimizer() # Optimization result = optimize.minimize( problem=problem, optimizer=optimizer, n_starts=n_starts, filename=None, ) # Profiling result = profile.parameter_profile( problem=problem, result=result, profile_index=[0], optimizer=optimizer, filename=None, ) # Sampling sampler = sample.AdaptiveMetropolisSampler() result = sample.sample( problem=problem, sampler=sampler, n_samples=100, result=result, filename=None, ) # Read and write filename = 'test_file.hdf5' try: write_result(result=result, filename=filename) result_read = read_result(filename=filename) # test optimize for i, opt_res in enumerate(result.optimize_result.list): for key in opt_res: if key == 'history': continue if isinstance(opt_res[key], np.ndarray): np.testing.assert_array_equal( opt_res[key], result_read.optimize_result.list[i][key]) else: assert (opt_res[key] == result_read.optimize_result.list[i] [key]) # test profile for key in result.profile_result.list[0][0].keys(): if (result.profile_result.list[0][0].keys is None or key == 'time_path'): continue elif isinstance(result.profile_result.list[0][0][key], np.ndarray): np.testing.assert_array_equal( result.profile_result.list[0][0][key], result_read.profile_result.list[0][0][key], ) elif isinstance(result.profile_result.list[0][0][key], int): assert (result.profile_result.list[0][0][key] == result_read.profile_result.list[0][0][key]) # test sample for key in result.sample_result.keys(): if result.sample_result[key] is None or key == 'time': continue elif isinstance(result.sample_result[key], np.ndarray): np.testing.assert_array_equal( result.sample_result[key], result_read.sample_result[key], ) elif isinstance(result.sample_result[key], (float, int)): np.testing.assert_almost_equal( result.sample_result[key], result_read.sample_result[key], ) finally: if os.path.exists(filename): os.remove(filename)
def test_storage_sampling(): """ This test tests the saving and loading of samples into HDF5 through pypesto.store.SamplingResultHDF5Writer and pypesto.store.SamplingResultHDF5Reader. Tests all entries aside from time and message. """ objective = pypesto.Objective(fun=so.rosen, grad=so.rosen_der, hess=so.rosen_hess) dim_full = 10 lb = -5 * np.ones((dim_full, 1)) ub = 5 * np.ones((dim_full, 1)) n_starts = 5 startpoints = pypesto.startpoint.latin_hypercube(n_starts=n_starts, lb=lb, ub=ub) problem = pypesto.Problem(objective=objective, lb=lb, ub=ub, x_guesses=startpoints) optimizer = optimize.ScipyOptimizer() result_optimization = optimize.minimize( problem=problem, optimizer=optimizer, n_starts=n_starts, filename=None, ) x_0 = result_optimization.optimize_result.list[0]['x'] sampler = sample.AdaptiveParallelTemperingSampler( internal_sampler=sample.AdaptiveMetropolisSampler(), n_chains=1) sample_original = sample.sample( problem=problem, sampler=sampler, n_samples=100, x0=[x_0], filename=None, ) fn = 'test_file.hdf5' try: pypesto_sample_writer = SamplingResultHDF5Writer(fn) pypesto_sample_writer.write(sample_original) pypesto_sample_reader = SamplingResultHDF5Reader(fn) sample_read = pypesto_sample_reader.read() for key in sample_original.sample_result.keys(): if sample_original.sample_result[key] is None or key == 'time': continue elif isinstance(sample_original.sample_result[key], np.ndarray): np.testing.assert_array_equal( sample_original.sample_result[key], sample_read.sample_result[key], ) elif isinstance(sample_original.sample_result[key], (float, int)): np.testing.assert_almost_equal( sample_original.sample_result[key], sample_read.sample_result[key], ) finally: if os.path.exists(fn): os.remove(fn)
def test_ground_truth_separated_modes(): """Test whether we actually retrieve correct distributions.""" # use best self-implemented sampler, which has a chance to correctly # sample from the distribution # First use parallel tempering with 3 chains sampler = sample.AdaptiveParallelTemperingSampler( internal_sampler=sample.AdaptiveMetropolisSampler(), n_chains=3) problem = gaussian_mixture_separated_modes_problem() result = sample.sample( problem, n_samples=1e4, sampler=sampler, x0=np.array([0.0]), filename=None, ) # get samples of first chain samples = result.sample_result.trace_x[0, :, 0] # generate bimodal ground-truth samples # "first" mode centered at -1 rvs1 = norm.rvs(size=5000, loc=-1.0, scale=np.sqrt(0.7)) # "second" mode centered at 100 rvs2 = norm.rvs(size=5001, loc=100.0, scale=np.sqrt(0.8)) # test for distribution similarity statistic, pval = ks_2samp(np.concatenate([rvs1, rvs2]), samples) # only parallel tempering finds both modes print(statistic, pval) assert statistic < 0.2 # sample using adaptive metropolis (single-chain) # initiated around the "first" mode of the distribution sampler = sample.AdaptiveMetropolisSampler() result = sample.sample( problem, n_samples=1e4, sampler=sampler, x0=np.array([-2.0]), filename=None, ) # get samples of first chain samples = result.sample_result.trace_x[0, :, 0] # test for distribution similarity statistic, pval = ks_2samp(np.concatenate([rvs1, rvs2]), samples) # single-chain adaptive metropolis does not find both modes print(statistic, pval) assert statistic > 0.1 # actually centered at the "first" mode statistic, pval = ks_2samp(rvs1, samples) print(statistic, pval) assert statistic < 0.1 # sample using adaptive metropolis (single-chain) # initiated around the "second" mode of the distribution sampler = sample.AdaptiveMetropolisSampler() result = sample.sample( problem, n_samples=1e4, sampler=sampler, x0=np.array([120.0]), filename=None, ) # get samples of first chain samples = result.sample_result.trace_x[0, :, 0] # test for distribution similarity statistic, pval = ks_2samp(np.concatenate([rvs1, rvs2]), samples) # single-chain adaptive metropolis does not find both modes print(statistic, pval) assert statistic > 0.1 # actually centered at the "second" mode statistic, pval = ks_2samp(rvs2, samples) print(statistic, pval) assert statistic < 0.1