Exemple #1
0
long_patches = pypmc.mix_adapt.r_value.make_r_gaussmix(mcmc_data)
chain_groups = len(long_patches) / 15 # components per goup = 15

# form first proposal with Variational Bayes

# run variational bayes with samples --> use the long patches as initial guess
vb = pypmc.mix_adapt.variational.GaussianInference(stacked_data[::100], initial_guess=long_patches)
print 'running VB1...'
vb.run(1000, abs_tol=1e-5, rel_tol=1e-10, prune=.5*len(vb.data)/vb.K, verbose=True)
vbmix = vb.make_mixture()
# calculate perp/ess
vb_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, vbmix)
vb_sampler.run(N_perp_ess, trace_sort=False)
vb_weighted_samples = vb_sampler.history[-1]
vb_weights = vb_weighted_samples[:,0]
vb_perp = perp(vb_weights)
vb_ess  = ess (vb_weights)
components_vb = len(vbmix)
print 'VB1 done'


print 'running VB2...'
prior_for_vb2 = vb.posterior2prior()
prior_for_vb2.pop('alpha0')
vb2 = pypmc.mix_adapt.variational.GaussianInference(vb_weighted_samples[:vb2_N,1:], weights=vb_weights[:vb2_N],
                                                    initial_guess=vbmix, **prior_for_vb2)
vb2.run(1000, abs_tol=1e-5, rel_tol=1e-10, verbose=True)
vb2mix = vb2.make_mixture()
vb2_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, vb2mix)
vb2_sampler.run(N_perp_ess, trace_sort=False)
vb2_weighted_samples = vb2_sampler.history[-1]
Exemple #2
0
# ***********************************************************************
# ****************** nothing below should be changed ! ******************
# ***********************************************************************





# use importance sampling to calculate perplexity and effective sample size

# define an ImportanceSampler object using ``reduced_proposal``
sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, final_proposal)

# run N_perp_ess steps
sampler.run(N_perp_ess, trace_sort=True)

# get the weights from the samples that have just been generated
weighted_samples = sampler.history[-1]
weights = weighted_samples[:,0]

# calculate perplexity and ess
perplexity = perp(weights)
ess        = ess (weights)

# save perplexity and ess
params.update( [('perplexity', perplexity), ('ess', ess)] )

# dump results
save_final_proposal(final_proposal, params)
chain_groups = len(long_patches) / 15  # components per goup = 15


# form first proposal with PMC
print "running PMC"
pmcmix = pypmc.mix_adapt.pmc.gaussian_pmc(stacked_data[::100], long_patches, copy=True)
pmcmix.prune(0.5 / len(long_patches))
for i in range(1000 - 1):
    print i
    pypmc.mix_adapt.pmc.gaussian_pmc(stacked_data[::100], pmcmix, copy=False)
    pmcmix.prune(0.5 / len(long_patches))
pmc_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, pmcmix)
pmc_sampler.run(N_perp_ess, trace_sort=True)
pmc_weighted_samples = pmc_sampler.history[-1]
pmc_weights = pmc_weighted_samples[:, 0]
pmc_perp = perp(pmc_weights)
pmc_ess = ess(pmc_weights)
components_pmc = len(pmcmix)
print "PMC done"


# form first proposal with Variational Bayes

# run variational bayes with samples --> use the long patches as initial guess
vb = pypmc.mix_adapt.variational.GaussianInference(stacked_data[::100], initial_guess=long_patches)
print "running VB..."
vb.run(1000, abs_tol=1e-5, rel_tol=1e-10, prune=0.5 * len(vb.data) / vb.K, verbose=True)
vbmix = vb.make_mixture()
# calculate perp/ess
vb_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, vbmix)
vb_sampler.run(N_perp_ess, trace_sort=True)
stacked_data = np.vstack(mcmc_data)

print 'Markov Chains done'


# form the "long_patches"
long_patches = pypmc.mix_adapt.r_value.make_r_gaussmix(mcmc_data)
chain_groups = len(long_patches) / 15 # components per goup = 15


# form first proposal with Variational Bayes

# run variational bayes with samples --> use the long patches as initial guess
vb = pypmc.mix_adapt.variational.GaussianInference(stacked_data[::100], initial_guess=long_patches)
print 'running VB...'
vb.run(1000, abs_tol=1e-5, rel_tol=1e-5, prune=.5*len(vb.data)/vb.K, verbose=True)
vbmix = vb.make_mixture()
# calculate perp/ess
vb_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, vbmix)
vb_sampler.run(N_perp_ess, trace_sort=True)
vb_weighted_samples = vb_sampler.history[-1]
vb_weights = vb_weighted_samples[:,0]
vb_perp = perp(vb_weights)
vb_ess  = ess (vb_weights)
components_vb = len(vbmix)
print 'VB done'
# append this run's results to output file
outfile = open('calculate_perp_ess_VB_loose_convergence.txt', 'a')
outfile.write('\n' + str(chain_groups) + ' ' + str(components_vb) + ' ' + str(vb_perp) + ' ' + str(vb_ess))
outfile.close()
long_patches = pypmc.mix_adapt.r_value.make_r_gaussmix(mcmc_data, K_g=5)
chain_groups = len(long_patches) / 5  # components per goup = 5

# for hierarchical clustering (and VBMerge) form "short_patches"
short_patches = pypmc.tools.patch_data(stacked_data, L=100)

# run hierarchical clustering
hc = pypmc.mix_adapt.hierarchical.Hierarchical(short_patches, long_patches, verbose=True)
print "running HC..."
hc.run()
hcmix = hc.g
hc_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, hcmix)
hc_sampler.run(N_perp_ess, trace_sort=True)
hc_weighted_samples = hc_sampler.history[-1]
hc_weights = hc_weighted_samples[:, 0]
hc_perp = perp(hc_weights)
hc_ess = ess(hc_weights)
components_hc = len(hcmix)
print "HC done"


# importance sampling main loop

sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, hcmix)
previous_perp = -np.inf
converge_step = None

for i in range(25):
    print "step", i

    # run len(sampler.proposal) * N_c steps
# ---------------------- continue with importance sampling ----------------------

# define an ImportanceSampler object using ``reduced_proposal``
sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, reduced_proposal)

generating_components = []
perplexities = [0.]

for i in range(10):
    print "step", i
    # run 10,000 steps
    generating_components.append(sampler.run(N_c * (sampler.proposal.weights!=0).sum(), trace_sort=True))
    # get the weighted samples that have just been generated
    weighted_samples = sampler.history[-1]
    perplexities.append(perp(weighted_samples[:,0]))
    if (perplexities[i+1] - perplexities[i]) / perplexities[i+1] < .05:
        statusfile.write('PMC converged in step ' + str(i) + '\n')
        break
    # update the proposal using PMC
    pypmc.mix_adapt.pmc.gaussian_pmc(weighted_samples[:,1:], sampler.proposal, weights=weighted_samples[:,0],
                                     latent=generating_components[-1], rb=True, mincount=20, copy=False)
    # plot
    plt.figure()
    plt.title('proposal after PMC update ' + str(i))
    plot_mixture(sampler.proposal, 0,1, cutoff=.01)
    plotfile.savefig()
    statusfile.write('have %i live components PMC step %i\n' %((sampler.proposal.weights!=0).sum(), i)    )

statusfile.write('Perplexities:\n')
for perp in perplexities[1:]: #[1:] because of initial 0.