def _update_results(self, samples, weights): if self.log: self.logger.info('Likelihood function evaluations: %d', self.ncall) integral_estimator = weights.sum() / len(weights) integral_uncertainty_estimator = np.sqrt( (weights**2).sum() / len(weights) - integral_estimator**2) / np.sqrt(len(weights) - 1) logZ = np.log(integral_estimator) logZerr = np.log(integral_estimator + integral_uncertainty_estimator) - logZ ess_fraction = ess(weights) # get a decent accuracy based on the weights, and not too few samples Nsamples = int(max(400, ess_fraction * len(weights) * 40)) eqsamples_u = resample_equal(samples, weights / weights.sum(), N=Nsamples) eqsamples = np.asarray([self.transform(u) for u in eqsamples_u]) results = dict( z=integral_estimator * np.exp(self.Loffset), zerr=integral_uncertainty_estimator * np.exp(self.Loffset), logz=logZ + self.Loffset, logzerr=logZerr, ess=ess_fraction, paramnames=self.paramnames, ncall=int(self.ncall), posterior=dict( mean=eqsamples.mean(axis=0).tolist(), stdev=eqsamples.std(axis=0).tolist(), median=np.percentile(eqsamples, 50, axis=0).tolist(), errlo=np.percentile(eqsamples, 15.8655, axis=0).tolist(), errup=np.percentile(eqsamples, 84.1345, axis=0).tolist(), ), samples=eqsamples, ) self.results = results return results
chain_groups = len(long_patches) / 15 # components per goup = 15 # form first proposal with Variational Bayes # run variational bayes with samples --> use the long patches as initial guess vb = pypmc.mix_adapt.variational.GaussianInference(stacked_data[::100], initial_guess=long_patches) print 'running VB1...' vb.run(1000, abs_tol=1e-5, rel_tol=1e-10, prune=.5*len(vb.data)/vb.K, verbose=True) vbmix = vb.make_mixture() # calculate perp/ess vb_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, vbmix) vb_sampler.run(N_perp_ess, trace_sort=False) vb_weighted_samples = vb_sampler.history[-1] vb_weights = vb_weighted_samples[:,0] vb_perp = perp(vb_weights) vb_ess = ess (vb_weights) components_vb = len(vbmix) print 'VB1 done' print 'running VB2...' prior_for_vb2 = vb.posterior2prior() prior_for_vb2.pop('alpha0') vb2 = pypmc.mix_adapt.variational.GaussianInference(vb_weighted_samples[:vb2_N,1:], weights=vb_weights[:vb2_N], initial_guess=vbmix, **prior_for_vb2) vb2.run(1000, abs_tol=1e-5, rel_tol=1e-10, verbose=True) vb2mix = vb2.make_mixture() vb2_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, vb2mix) vb2_sampler.run(N_perp_ess, trace_sort=False) vb2_weighted_samples = vb2_sampler.history[-1] vb2_weights = vb2_weighted_samples[:,0]
# *********************************************************************** # ****************** nothing below should be changed ! ****************** # *********************************************************************** # use importance sampling to calculate perplexity and effective sample size # define an ImportanceSampler object using ``reduced_proposal`` sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, final_proposal) # run N_perp_ess steps sampler.run(N_perp_ess, trace_sort=True) # get the weights from the samples that have just been generated weighted_samples = sampler.history[-1] weights = weighted_samples[:,0] # calculate perplexity and ess perplexity = perp(weights) ess = ess (weights) # save perplexity and ess params.update( [('perplexity', perplexity), ('ess', ess)] ) # dump results save_final_proposal(final_proposal, params)
# form first proposal with PMC print "running PMC" pmcmix = pypmc.mix_adapt.pmc.gaussian_pmc(stacked_data[::100], long_patches, copy=True) pmcmix.prune(0.5 / len(long_patches)) for i in range(1000 - 1): print i pypmc.mix_adapt.pmc.gaussian_pmc(stacked_data[::100], pmcmix, copy=False) pmcmix.prune(0.5 / len(long_patches)) pmc_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, pmcmix) pmc_sampler.run(N_perp_ess, trace_sort=True) pmc_weighted_samples = pmc_sampler.history[-1] pmc_weights = pmc_weighted_samples[:, 0] pmc_perp = perp(pmc_weights) pmc_ess = ess(pmc_weights) components_pmc = len(pmcmix) print "PMC done" # form first proposal with Variational Bayes # run variational bayes with samples --> use the long patches as initial guess vb = pypmc.mix_adapt.variational.GaussianInference(stacked_data[::100], initial_guess=long_patches) print "running VB..." vb.run(1000, abs_tol=1e-5, rel_tol=1e-10, prune=0.5 * len(vb.data) / vb.K, verbose=True) vbmix = vb.make_mixture() # calculate perp/ess vb_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, vbmix) vb_sampler.run(N_perp_ess, trace_sort=True) vb_weighted_samples = vb_sampler.history[-1]
stacked_data = np.vstack(mcmc_data) print 'Markov Chains done' # form the "long_patches" long_patches = pypmc.mix_adapt.r_value.make_r_gaussmix(mcmc_data) chain_groups = len(long_patches) / 15 # components per goup = 15 # form first proposal with Variational Bayes # run variational bayes with samples --> use the long patches as initial guess vb = pypmc.mix_adapt.variational.GaussianInference(stacked_data[::100], initial_guess=long_patches) print 'running VB...' vb.run(1000, abs_tol=1e-5, rel_tol=1e-5, prune=.5*len(vb.data)/vb.K, verbose=True) vbmix = vb.make_mixture() # calculate perp/ess vb_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, vbmix) vb_sampler.run(N_perp_ess, trace_sort=True) vb_weighted_samples = vb_sampler.history[-1] vb_weights = vb_weighted_samples[:,0] vb_perp = perp(vb_weights) vb_ess = ess (vb_weights) components_vb = len(vbmix) print 'VB done' # append this run's results to output file outfile = open('calculate_perp_ess_VB_loose_convergence.txt', 'a') outfile.write('\n' + str(chain_groups) + ' ' + str(components_vb) + ' ' + str(vb_perp) + ' ' + str(vb_ess)) outfile.close()
def run_iter( self, num_gauss_samples=400, max_ncalls=100000, min_ess=400, max_improvement_loops=4, heavytail_laplaceapprox=True, verbose=True, ): """ Iterative version of run(). See documentation there. Returns current samples on each iteration. """ paramnames = self.paramnames loglike = self.loglike transform = self.transform ndim = len(paramnames) optu, cov, invcov = self.optu, self.cov, self.invcov # for numerical stability, use 1e260, so that we can go down be 1e-100, # but up by 1e600 self.Loffset = self.optL #+ 600 # first iteration: create a single gaussian and importance-sample if self.log: self.logger.info("Initiating gaussian importance sampler") def log_target(u): """ log-posterior to sample from """ if (u > 1).any() or (u < 0).any(): return -np.inf p = transform(u) L = loglike(p) return L - self.Loffset if not heavytail_laplaceapprox: initial_proposal = Gauss(optu, cov) else: # make a few gaussians, in case the fit errors were too narrow means, covs, weights = _make_initial_proposal(optu, cov) initial_proposal = create_gaussian_mixture(means, covs, weights) mixes = [initial_proposal] N = num_gauss_samples Nhere = N // self.mpi_size if self.mpi_size > 1: SequentialIS = ImportanceSampler from pypmc.tools.parallel_sampler import MPISampler sampler = MPISampler(SequentialIS, target=log_target, proposal=initial_proposal, prealloc=Nhere) else: sampler = ImportanceSampler(target=log_target, proposal=initial_proposal, prealloc=Nhere) if self.log: self.logger.info(" sampling %d ..." % N) np.seterr(over="warn") sampler.run(Nhere) self.ncall += Nhere * self.mpi_size samples, weights = self._collect_samples(sampler) assert weights.sum() > 0, 'All samples have weight zero.' vbmix = None for it in range(max_improvement_loops): ess_fraction = ess(weights) if self.log: self.logger.info(" sampling efficiency: %.3f%%" % (ess_fraction * 100)) if it % 3 == 0: if self.log: self.logger.info("Optimizing proposal (from scratch) ...") mix = _make_proposal(samples, weights, optu, cov, invcov) vb = GaussianInference(samples, weights=weights, initial_guess=mix, W0=np.eye(ndim) * 1e10) vb_prune = 0.5 * len(vb.data) / vb.K else: if self.log: self.logger.info("Optimizing proposal (from previous) ...") prior_for_proposal_update = vb.posterior2prior() prior_for_proposal_update.pop('alpha0') vb = GaussianInference(samples, initial_guess=vbmix, weights=weights, **prior_for_proposal_update) if self.log: self.logger.info(' running variational Bayes ...') vb.run(1000, rel_tol=1e-8, abs_tol=1e-5, prune=vb_prune, verbose=False) vbmix = vb.make_mixture() if self.log: self.logger.info(' reduced from %d to %d components' % (len(mix.components), len(vbmix.components))) sampler.proposal = vbmix if self.log: self.logger.info("Importance sampling %d ..." % N) sampler.run(N // self.mpi_size) self.ncall += (N // self.mpi_size) * self.mpi_size mixes.append(vbmix) samples, weights = self._collect_samples(sampler) ess_fraction = ess(weights) if self.log: self.logger.debug(" sampling efficiency: %.3f%%" % (ess_fraction * 100)) self.logger.debug(" obtained %.0f new effective samples" % (ess_fraction * len(weights))) samples, weights = self._collect_samples(sampler, all=True, mixes=mixes) ess_fraction = ess(weights) Ndone = ess_fraction * len(weights) result = self._update_results(samples, weights) if Ndone >= min_ess: if self.log: self.logger.info( "Status: Have %d total effective samples, done." % Ndone) yield result break elif self.ncall > max_ncalls: if self.log: self.logger.info( "Status: Have %d total effective samples, reached max number of calls." % Ndone) yield result break else: N = int(1.4 * min(max_ncalls - self.ncall, N)) if self.log: self.logger.info( "Status: Have %d total effective samples, sampling %d next." % (Ndone, N)) yield result
chain_groups = len(long_patches) / 5 # components per goup = 5 # for hierarchical clustering (and VBMerge) form "short_patches" short_patches = pypmc.tools.patch_data(stacked_data, L=100) # run hierarchical clustering hc = pypmc.mix_adapt.hierarchical.Hierarchical(short_patches, long_patches, verbose=True) print "running HC..." hc.run() hcmix = hc.g hc_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, hcmix) hc_sampler.run(N_perp_ess, trace_sort=True) hc_weighted_samples = hc_sampler.history[-1] hc_weights = hc_weighted_samples[:, 0] hc_perp = perp(hc_weights) hc_ess = ess(hc_weights) components_hc = len(hcmix) print "HC done" # importance sampling main loop sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, hcmix) previous_perp = -np.inf converge_step = None for i in range(25): print "step", i # run len(sampler.proposal) * N_c steps latent = sampler.run(N_c * len(sampler.proposal), trace_sort=True)
for i in range(10): print "step", i # run 10,000 steps generating_components.append(sampler.run(N_c * (sampler.proposal.weights!=0).sum(), trace_sort=True)) # get the weighted samples that have just been generated weighted_samples = sampler.history[-1] perplexities.append(perp(weighted_samples[:,0])) if (perplexities[i+1] - perplexities[i]) / perplexities[i+1] < .05: statusfile.write('PMC converged in step ' + str(i) + '\n') break # update the proposal using PMC pypmc.mix_adapt.pmc.gaussian_pmc(weighted_samples[:,1:], sampler.proposal, weights=weighted_samples[:,0], latent=generating_components[-1], rb=True, mincount=20, copy=False) # plot plt.figure() plt.title('proposal after PMC update ' + str(i)) plot_mixture(sampler.proposal, 0,1, cutoff=.01) plotfile.savefig() statusfile.write('have %i live components PMC step %i\n' %((sampler.proposal.weights!=0).sum(), i) ) statusfile.write('Perplexities:\n') for perp in perplexities[1:]: #[1:] because of initial 0. statusfile.write('%s\n' %perp) statusfile.write('Effective sample sizes:\n') for ws in sampler.history: statusfile.write('%s\n' %ess(ws[:,0])) statusfile.close() plotfile.close()