def test_pca_chain_take_step(line_posterior): chain = PcaChain(posterior=line_posterior, start=[0.5, 0.1]) first_n = chain.n chain.take_step() assert chain.n == first_n + 1 assert len(chain.params[0].samples) == chain.n assert len(chain.probs) == chain.n
def test_pca_chain_advance(line_posterior): chain = PcaChain(posterior=line_posterior, start=[0.5, 0.1]) first_n = chain.n steps = 104 chain.advance(steps) assert chain.n == first_n + steps assert len(chain.params[0].samples) == chain.n assert len(chain.probs) == chain.n
def test_pca_chain_restore(line_posterior, tmp_path): chain = PcaChain(posterior=line_posterior, start=[0.5, 0.1]) steps = 200 chain.advance(steps) filename = tmp_path / "restore_file.npz" chain.save(filename) new_chain = PcaChain.load(filename) assert new_chain.n == chain.n assert new_chain.probs == chain.probs assert all(new_chain.get_last() == chain.get_last())
# samples from the prior directly using the sample() method: prior_sample = prior.sample() print(prior_sample) # The likelihood and prior can be easily combined into a posterior distribution # using the Posterior class: from inference.posterior import Posterior posterior = Posterior(likelihood=likelihood, prior=prior) # Now we have constructed a posterior distribution, we can sample from it # using Markov-chain Monte-Carlo (MCMC). # The inference.mcmc module contains implementations of various MCMC sampling algorithms. # Here we import the PcaChain class and use it to create a Markov-chain object: from inference.mcmc import PcaChain chain = PcaChain(posterior=posterior, start=initial_guess) # We generate samples by advancing the chain by a chosen number of steps using the advance method: chain.advance(25000) # we can check the status of the chain using the plot_diagnostics method: chain.plot_diagnostics() # The burn-in (how many samples from the start of the chain are discarded) # can be chosen by setting the burn attribute of the chain object: chain.burn = 5000 # we can get a quick overview of the posterior using the matrix_plot method # of chain objects, which plots all possible 1D & 2D marginal distributions # of the full parameter set (or a chosen sub-set). chain.matrix_plot(labels=['area', 'width', 'center', 'background'])
# plot the simulated data we're going to use plt.errorbar(x_data, y_data, errors, marker='D', ls='none', markersize=4) plt.plot(x_data, y_data, alpha=0.5, c='C0', ls='dashed') plt.title('synthetic spectroscopy data') plt.xlabel('wavelength (nm)') plt.ylabel('intensity') plt.grid() plt.show() # create the posterior object posterior = GaussianLikelihood(y_data=y_data, sigma=errors, forward_model=model) # create the markov chain object chain = PcaChain(posterior=posterior, start=[600, 1, 600, 1, 15]) # generate a sample by advancing the chain chain.advance(20000) # we can check the status of the chain using the plot_diagnostics method chain.plot_diagnostics() # We can automatically set sensible burn and thin values for the sample chain.autoselect_burn() chain.autoselect_thin() # we can get a quick overview of the posterior using the matrix_plot # functionality of chain objects, which plots all possible 1D & 2D # marginal distributions of the full parameter set (or a chosen sub-set). chain.matrix_plot()
plt.errorbar(x_data, y_data, errors, marker='D', ls='none', markersize=4) plt.plot(x_data, y_data, alpha=0.5, c='C0', ls='dashed') plt.title('synthetic spectroscopy data') plt.xlabel('wavelength (nm)') plt.ylabel('intensity') plt.grid() plt.tight_layout() plt.savefig('spectroscopy_data.png') plt.close() print(' # spectroscopy data plot finished') # create the posterior object posterior = SpectroPosterior(x_data, y_data, errors) # create the markov chain object chain = PcaChain(posterior=posterior, start=[1000, 1, 1000, 1, 20]) # generate a sample by advancing the chain chain.advance(50000) # we can check the status of the chain using the plot_diagnostics method chain.plot_diagnostics(show=False, filename='plot_diagnostics_example.png') print(' # diagnostics plot finished') # We can automatically set sensible burn and thin values for the sample chain.autoselect_burn_and_thin() # we can get a quick overview of the posterior using the matrix_plot # functionality of chain objects, which plots all possible 1D & 2D # marginal distributions of the full parameter set (or a chosen sub-set). chain.thin = 1 labels = [
return a*x**2 + b*x + c def __call__(self, theta): prediction = self.forward(self.x, theta) return -0.5*(((prediction-self.y)/self.sigma)**2).sum() seed(4) x = linspace(1, 9, 9) start = [-0.5,4.,30.] y = HdiPosterior.forward(x, start) s = y*0.1 + 2 y += normal(size=len(y))*s p = HdiPosterior(x,y,s) chain = PcaChain(posterior=p, start = start)#, parameter_boundaries=[(0,200),(0.1,10),(0.1,15)]) # chain = GibbsChain(posterior=p, start = [1.,1.,5.])#, parameter_boundaries=[(0,200),(0.1,10),(0.1,15)]) chain.advance(105000) chain.burn = 5000 chain.thin = 2 # chain.plot_diagnostics() # chain.trace_plot() # chain.matrix_plot() x_fits = linspace(0,10,100) sample = chain.get_sample() # pass each through the forward model curves = array([HdiPosterior.forward(x_fits, theta) for theta in sample])
For an N-parameter problem, PcaChain produces a new sample by making N sequential 1D Metropolis-Hastings steps in the direction of each of the N eigenvectors of the NxN covariance matrix. As an initial guess the covariance matrix is taken to be diagonal, which results in standard gibbs sampling for the first samples in the chain. Subsequently, the covariance matrix periodically updated with an estimate derived from the sample itself, and the eigenvectors are re-calculated. """ # create our posterior with two highly-correlated parameters posterior = CorrelatedLinePosterior() # create a PcaChain, and also a GibbsChain for comparison pca = PcaChain(posterior=posterior, start=[-1, 1, -1]) gibbs = GibbsChain(posterior=posterior, start=[-1, 1, -1]) # advance both chains for the same amount of samples pca.advance(50000) gibbs.advance(50000) # get an estimate of the marginal distribution of one of the correlated parameters pca_pdf = pca.get_marginal(2, burn=5000) gibbs_pdf = gibbs.get_marginal(2, burn=5000) # over-plot the marginal estimates to compare the performance marginal_axis = linspace(-4, 2, 500) plt.plot(marginal_axis, pca_pdf(marginal_axis), lw=2,