Beispiel #1
0
    def _update_results(self, samples, weights):
        if self.log:
            self.logger.info('Likelihood function evaluations: %d', self.ncall)

        integral_estimator = weights.sum() / len(weights)
        integral_uncertainty_estimator = np.sqrt(
            (weights**2).sum() / len(weights) -
            integral_estimator**2) / np.sqrt(len(weights) - 1)

        logZ = np.log(integral_estimator)
        logZerr = np.log(integral_estimator +
                         integral_uncertainty_estimator) - logZ
        ess_fraction = ess(weights)

        # get a decent accuracy based on the weights, and not too few samples
        Nsamples = int(max(400, ess_fraction * len(weights) * 40))
        eqsamples_u = resample_equal(samples,
                                     weights / weights.sum(),
                                     N=Nsamples)
        eqsamples = np.asarray([self.transform(u) for u in eqsamples_u])

        results = dict(
            z=integral_estimator * np.exp(self.Loffset),
            zerr=integral_uncertainty_estimator * np.exp(self.Loffset),
            logz=logZ + self.Loffset,
            logzerr=logZerr,
            ess=ess_fraction,
            paramnames=self.paramnames,
            ncall=int(self.ncall),
            posterior=dict(
                mean=eqsamples.mean(axis=0).tolist(),
                stdev=eqsamples.std(axis=0).tolist(),
                median=np.percentile(eqsamples, 50, axis=0).tolist(),
                errlo=np.percentile(eqsamples, 15.8655, axis=0).tolist(),
                errup=np.percentile(eqsamples, 84.1345, axis=0).tolist(),
            ),
            samples=eqsamples,
        )
        self.results = results
        return results
Beispiel #2
0
chain_groups = len(long_patches) / 15 # components per goup = 15

# form first proposal with Variational Bayes

# run variational bayes with samples --> use the long patches as initial guess
vb = pypmc.mix_adapt.variational.GaussianInference(stacked_data[::100], initial_guess=long_patches)
print 'running VB1...'
vb.run(1000, abs_tol=1e-5, rel_tol=1e-10, prune=.5*len(vb.data)/vb.K, verbose=True)
vbmix = vb.make_mixture()
# calculate perp/ess
vb_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, vbmix)
vb_sampler.run(N_perp_ess, trace_sort=False)
vb_weighted_samples = vb_sampler.history[-1]
vb_weights = vb_weighted_samples[:,0]
vb_perp = perp(vb_weights)
vb_ess  = ess (vb_weights)
components_vb = len(vbmix)
print 'VB1 done'


print 'running VB2...'
prior_for_vb2 = vb.posterior2prior()
prior_for_vb2.pop('alpha0')
vb2 = pypmc.mix_adapt.variational.GaussianInference(vb_weighted_samples[:vb2_N,1:], weights=vb_weights[:vb2_N],
                                                    initial_guess=vbmix, **prior_for_vb2)
vb2.run(1000, abs_tol=1e-5, rel_tol=1e-10, verbose=True)
vb2mix = vb2.make_mixture()
vb2_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, vb2mix)
vb2_sampler.run(N_perp_ess, trace_sort=False)
vb2_weighted_samples = vb2_sampler.history[-1]
vb2_weights = vb2_weighted_samples[:,0]
Beispiel #3
0
# ***********************************************************************
# ****************** nothing below should be changed ! ******************
# ***********************************************************************





# use importance sampling to calculate perplexity and effective sample size

# define an ImportanceSampler object using ``reduced_proposal``
sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, final_proposal)

# run N_perp_ess steps
sampler.run(N_perp_ess, trace_sort=True)

# get the weights from the samples that have just been generated
weighted_samples = sampler.history[-1]
weights = weighted_samples[:,0]

# calculate perplexity and ess
perplexity = perp(weights)
ess        = ess (weights)

# save perplexity and ess
params.update( [('perplexity', perplexity), ('ess', ess)] )

# dump results
save_final_proposal(final_proposal, params)

# form first proposal with PMC
print "running PMC"
pmcmix = pypmc.mix_adapt.pmc.gaussian_pmc(stacked_data[::100], long_patches, copy=True)
pmcmix.prune(0.5 / len(long_patches))
for i in range(1000 - 1):
    print i
    pypmc.mix_adapt.pmc.gaussian_pmc(stacked_data[::100], pmcmix, copy=False)
    pmcmix.prune(0.5 / len(long_patches))
pmc_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, pmcmix)
pmc_sampler.run(N_perp_ess, trace_sort=True)
pmc_weighted_samples = pmc_sampler.history[-1]
pmc_weights = pmc_weighted_samples[:, 0]
pmc_perp = perp(pmc_weights)
pmc_ess = ess(pmc_weights)
components_pmc = len(pmcmix)
print "PMC done"


# form first proposal with Variational Bayes

# run variational bayes with samples --> use the long patches as initial guess
vb = pypmc.mix_adapt.variational.GaussianInference(stacked_data[::100], initial_guess=long_patches)
print "running VB..."
vb.run(1000, abs_tol=1e-5, rel_tol=1e-10, prune=0.5 * len(vb.data) / vb.K, verbose=True)
vbmix = vb.make_mixture()
# calculate perp/ess
vb_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, vbmix)
vb_sampler.run(N_perp_ess, trace_sort=True)
vb_weighted_samples = vb_sampler.history[-1]
stacked_data = np.vstack(mcmc_data)

print 'Markov Chains done'


# form the "long_patches"
long_patches = pypmc.mix_adapt.r_value.make_r_gaussmix(mcmc_data)
chain_groups = len(long_patches) / 15 # components per goup = 15


# form first proposal with Variational Bayes

# run variational bayes with samples --> use the long patches as initial guess
vb = pypmc.mix_adapt.variational.GaussianInference(stacked_data[::100], initial_guess=long_patches)
print 'running VB...'
vb.run(1000, abs_tol=1e-5, rel_tol=1e-5, prune=.5*len(vb.data)/vb.K, verbose=True)
vbmix = vb.make_mixture()
# calculate perp/ess
vb_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, vbmix)
vb_sampler.run(N_perp_ess, trace_sort=True)
vb_weighted_samples = vb_sampler.history[-1]
vb_weights = vb_weighted_samples[:,0]
vb_perp = perp(vb_weights)
vb_ess  = ess (vb_weights)
components_vb = len(vbmix)
print 'VB done'
# append this run's results to output file
outfile = open('calculate_perp_ess_VB_loose_convergence.txt', 'a')
outfile.write('\n' + str(chain_groups) + ' ' + str(components_vb) + ' ' + str(vb_perp) + ' ' + str(vb_ess))
outfile.close()
Beispiel #6
0
    def run_iter(
        self,
        num_gauss_samples=400,
        max_ncalls=100000,
        min_ess=400,
        max_improvement_loops=4,
        heavytail_laplaceapprox=True,
        verbose=True,
    ):
        """
        Iterative version of run(). See documentation there.
        Returns current samples on each iteration.
        """
        paramnames = self.paramnames
        loglike = self.loglike
        transform = self.transform

        ndim = len(paramnames)
        optu, cov, invcov = self.optu, self.cov, self.invcov
        # for numerical stability, use 1e260, so that we can go down be 1e-100,
        # but up by 1e600
        self.Loffset = self.optL  #+ 600

        # first iteration: create a single gaussian and importance-sample
        if self.log:
            self.logger.info("Initiating gaussian importance sampler")

        def log_target(u):
            """ log-posterior to sample from """
            if (u > 1).any() or (u < 0).any():
                return -np.inf
            p = transform(u)
            L = loglike(p)
            return L - self.Loffset

        if not heavytail_laplaceapprox:
            initial_proposal = Gauss(optu, cov)
        else:
            # make a few gaussians, in case the fit errors were too narrow
            means, covs, weights = _make_initial_proposal(optu, cov)
            initial_proposal = create_gaussian_mixture(means, covs, weights)

        mixes = [initial_proposal]

        N = num_gauss_samples
        Nhere = N // self.mpi_size
        if self.mpi_size > 1:
            SequentialIS = ImportanceSampler
            from pypmc.tools.parallel_sampler import MPISampler
            sampler = MPISampler(SequentialIS,
                                 target=log_target,
                                 proposal=initial_proposal,
                                 prealloc=Nhere)
        else:
            sampler = ImportanceSampler(target=log_target,
                                        proposal=initial_proposal,
                                        prealloc=Nhere)

        if self.log:
            self.logger.info("    sampling %d ..." % N)
        np.seterr(over="warn")
        sampler.run(Nhere)
        self.ncall += Nhere * self.mpi_size

        samples, weights = self._collect_samples(sampler)
        assert weights.sum() > 0, 'All samples have weight zero.'

        vbmix = None
        for it in range(max_improvement_loops):
            ess_fraction = ess(weights)
            if self.log:
                self.logger.info("    sampling efficiency: %.3f%%" %
                                 (ess_fraction * 100))

            if it % 3 == 0:
                if self.log:
                    self.logger.info("Optimizing proposal (from scratch) ...")
                mix = _make_proposal(samples, weights, optu, cov, invcov)
                vb = GaussianInference(samples,
                                       weights=weights,
                                       initial_guess=mix,
                                       W0=np.eye(ndim) * 1e10)
                vb_prune = 0.5 * len(vb.data) / vb.K
            else:
                if self.log:
                    self.logger.info("Optimizing proposal (from previous) ...")
                prior_for_proposal_update = vb.posterior2prior()
                prior_for_proposal_update.pop('alpha0')
                vb = GaussianInference(samples,
                                       initial_guess=vbmix,
                                       weights=weights,
                                       **prior_for_proposal_update)

            if self.log:
                self.logger.info('    running variational Bayes ...')
            vb.run(1000,
                   rel_tol=1e-8,
                   abs_tol=1e-5,
                   prune=vb_prune,
                   verbose=False)
            vbmix = vb.make_mixture()
            if self.log:
                self.logger.info('    reduced from %d to %d components' %
                                 (len(mix.components), len(vbmix.components)))

            sampler.proposal = vbmix

            if self.log:
                self.logger.info("Importance sampling %d ..." % N)
            sampler.run(N // self.mpi_size)
            self.ncall += (N // self.mpi_size) * self.mpi_size
            mixes.append(vbmix)

            samples, weights = self._collect_samples(sampler)
            ess_fraction = ess(weights)
            if self.log:
                self.logger.debug("    sampling efficiency: %.3f%%" %
                                  (ess_fraction * 100))
                self.logger.debug("    obtained %.0f new effective samples" %
                                  (ess_fraction * len(weights)))

            samples, weights = self._collect_samples(sampler,
                                                     all=True,
                                                     mixes=mixes)
            ess_fraction = ess(weights)
            Ndone = ess_fraction * len(weights)

            result = self._update_results(samples, weights)
            if Ndone >= min_ess:
                if self.log:
                    self.logger.info(
                        "Status: Have %d total effective samples, done." %
                        Ndone)
                yield result
                break
            elif self.ncall > max_ncalls:
                if self.log:
                    self.logger.info(
                        "Status: Have %d total effective samples, reached max number of calls."
                        % Ndone)
                yield result
                break
            else:
                N = int(1.4 * min(max_ncalls - self.ncall, N))
                if self.log:
                    self.logger.info(
                        "Status: Have %d total effective samples, sampling %d next."
                        % (Ndone, N))
                yield result
chain_groups = len(long_patches) / 5  # components per goup = 5

# for hierarchical clustering (and VBMerge) form "short_patches"
short_patches = pypmc.tools.patch_data(stacked_data, L=100)

# run hierarchical clustering
hc = pypmc.mix_adapt.hierarchical.Hierarchical(short_patches, long_patches, verbose=True)
print "running HC..."
hc.run()
hcmix = hc.g
hc_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, hcmix)
hc_sampler.run(N_perp_ess, trace_sort=True)
hc_weighted_samples = hc_sampler.history[-1]
hc_weights = hc_weighted_samples[:, 0]
hc_perp = perp(hc_weights)
hc_ess = ess(hc_weights)
components_hc = len(hcmix)
print "HC done"


# importance sampling main loop

sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, hcmix)
previous_perp = -np.inf
converge_step = None

for i in range(25):
    print "step", i

    # run len(sampler.proposal) * N_c steps
    latent = sampler.run(N_c * len(sampler.proposal), trace_sort=True)
for i in range(10):
    print "step", i
    # run 10,000 steps
    generating_components.append(sampler.run(N_c * (sampler.proposal.weights!=0).sum(), trace_sort=True))
    # get the weighted samples that have just been generated
    weighted_samples = sampler.history[-1]
    perplexities.append(perp(weighted_samples[:,0]))
    if (perplexities[i+1] - perplexities[i]) / perplexities[i+1] < .05:
        statusfile.write('PMC converged in step ' + str(i) + '\n')
        break
    # update the proposal using PMC
    pypmc.mix_adapt.pmc.gaussian_pmc(weighted_samples[:,1:], sampler.proposal, weights=weighted_samples[:,0],
                                     latent=generating_components[-1], rb=True, mincount=20, copy=False)
    # plot
    plt.figure()
    plt.title('proposal after PMC update ' + str(i))
    plot_mixture(sampler.proposal, 0,1, cutoff=.01)
    plotfile.savefig()
    statusfile.write('have %i live components PMC step %i\n' %((sampler.proposal.weights!=0).sum(), i)    )

statusfile.write('Perplexities:\n')
for perp in perplexities[1:]: #[1:] because of initial 0.
    statusfile.write('%s\n' %perp)

statusfile.write('Effective sample sizes:\n')
for ws in sampler.history:
    statusfile.write('%s\n' %ess(ws[:,0]))

statusfile.close()
plotfile.close()