def _make_mixture_for_column(colname, col, verbose, Ngauss_init, nmin_multigauss, VB_iter): col = col[numpy.isfinite(col)] std = col.std() if len(col) == 0 or not (std > 0): # no useful samples if verbose > 0: print(' column %s: no data' % colname) return None elif len(col) < nmin_multigauss or VB_iter == 0: if verbose > 0: print(' column %s: %s +- %s' % (colname, col.mean(), std)) return create_gaussian_mixture( numpy.array([[col.mean()]], dtype=float), [numpy.array([[std]], dtype=float)]) else: means, covs = _make_single_mixture(col, Ngauss_init) mix = create_gaussian_mixture(means, covs) if verbose > 0: print(' column %s: running VB...' % colname) vb = GaussianInference(col.reshape(-1, 1), initial_guess=mix, W0=numpy.eye(1) * 1e10) vb_prune = 0.5 * len(vb.data) / vb.K vb.run(VB_iter, rel_tol=1e-8, abs_tol=1e-5, prune=vb_prune, verbose=verbose > 1) mix = vb.make_mixture() return mix
def make_long_patch_gaussian_mixture(data, K_g=15, critical_r=2.): '''Use samples from Markov-chains to form a Gaussian Mixture (to be used as initial guess for VB). This is done using "long patches" as in [Allen Fred]. :param data: Iterable of vector-like arrays; the individual items are interpreted as points from an individual chain. :param K_g: Integer; the number of components per chain group. :param critical_r: Float; the maximum R value a chain group may have. ''' def append_components(means, covs, data, partition): subdata_start = 0 subdata_stop = partition[0] for len_subdata in partition: subdata = data[subdata_start:subdata_stop] means.append( np.mean(subdata, axis=0) ) covs.append ( np.cov (subdata, rowvar=0) ) subdata_start += len_subdata subdata_stop += len_subdata chain_groups = pypmc.mix_adapt.r_value.r_group([np.mean(chain_values[:], axis=0) for chain_values in data], [np.cov(chain_values[:], rowvar=0) for chain_values in data], len(data[0]), critical_r) print 'found %i chain grous\n' %len(chain_groups) long_patches_means = [] long_patches_covs = [] for group in chain_groups: # we want K_g components from k_g = len(group) chains k_g = len(group) if K_g >= k_g: # find minimal lexicographic integer partition n = partition(K_g, k_g) for i, chain_index in enumerate(group): # need to partition in n[i] parts data_full_chain = data[chain_index] # find minimal lexicographic integer partition of chain_length into n[i] this_patch_lengths = partition(len(data_full_chain), n[i]) append_components(long_patches_means, long_patches_covs, data_full_chain, this_patch_lengths) else: # form one long chain and set k_g = 1 k_g = 1 # make one large chain data_full_chain = np.vstack([data[i] for i in group]) # need to partition into K_g parts -- > minimal lexicographic integer partition this_patch_lengths = partition(len(data_full_chain), K_g) append_components(long_patches_means, long_patches_covs, data_full_chain, this_patch_lengths) return create_gaussian_mixture(long_patches_means, long_patches_covs)
def simu(N, K, dim, gof_test= True, pc_select = True, write_results = True, verbose = False): #reset the random generator, useful for multiprocess seed() try: if verbose: print "init N=",N," dim=",dim #Some initialization #sc = StandardScaler() max_pca_comp = dim/2+1 # We generate the Gaussian mixture #gg = GaussianMixtureGen(dim, weights) #centers_star, cov_star = gg.get_params() gg = BasicGen(dim) centers_star, cov_star = gg.get_params() #X_ = gg.sample(N) X_ids = gg.sample(N, with_ids=True) ids = X_ids[:,-1] X_ = X_ids[:,:-1] K = len(set(ids)) weights = 1./K*np.ones(K) #We normalize the data for the PCA in the KL aggreg #X = sc.fit_transform(X_) X = X_ #We generate the target density f_star from the components f_star = GaussMixtureDensity(weights, centers_star, cov_star) f_star_sampling = create_gaussian_mixture(centers_star, cov_star, weights) ###################### # KL-AGGREG. ALGORITHM ###################### if verbose: print "starting KL-aggreg" time_kl_aggreg_start = time() dg = DictionaryGenerator(kmeans_k=KMEANS_K, max_pca_comp=max_pca_comp, subspace_cluster_dim=2, pc_select=pc_select) X_train_dict_gen = X[:N/2] X_train_kl_aggreg = X[N/2:] if gof_test: dg.fit(X_train_dict_gen) densities_dict = dg.simplify_gof() else: densities_dict = dg.fit_transform(X_train_dict_gen) cl = WeightEstimator(densities_dict=densities_dict) cl.fit(X_train_kl_aggreg) time_kl_aggreg_stop = time() kl_aggreg_weights = cl.pi_final kl_aggreg_density = KLaggDensity(kl_aggreg_weights, densities_dict) #Compute L2 loss kl_aggreg_integrand_L2_loss = IntegrandL2Density(f_star.pdf, kl_aggreg_density.pdf) kl_aggreg_l2 = l2_norm(kl_aggreg_integrand_L2_loss.pdf, f_star_sampling, sample_size=SAMPLE_SIZE_NORM_MC, hypercube_size=HYPERCUBE_SIZE) #Compute KL loss kl_aggreg_integrand_KL_loss = IntegrandKLDensity(f_star.pdf, kl_aggreg_density.pdf) kl_aggreg_kl = kl_norm(kl_aggreg_integrand_KL_loss.pdf, f_star_sampling, sample_size=SAMPLE_SIZE_NORM_MC, hypercube_size=HYPERCUBE_SIZE) if verbose: print "KL-aggreg done" print "KL-loss", kl_aggreg_kl print "L2-loss", kl_aggreg_l2 ################# #EM-BIC ALGORITHM ################# if verbose: print "starting EM-BIC" time_em_start = time() _, em_model = mle_bic(X, MAX_EM_BIC_K) time_em_stop = time() em_density = GaussMixtureDensity(em_model.weights_, em_model.means_, em_model.covariances_) #Compute L2 loss em_integrand_L2_loss = IntegrandL2Density(f_star.pdf, em_density.pdf) em_l2 = l2_norm(em_integrand_L2_loss.pdf, f_star_sampling, sample_size=SAMPLE_SIZE_NORM_MC, hypercube_size=HYPERCUBE_SIZE) #Compute KL loss em_integrand_KL_loss = IntegrandKLDensity(f_star.pdf, em_density.pdf) em_kl = kl_norm(em_integrand_KL_loss.pdf, f_star_sampling, sample_size = SAMPLE_SIZE_NORM_MC, hypercube_size=HYPERCUBE_SIZE) if verbose: print "EM-BIC done" print "KL-loss", em_kl print "L2-loss", em_l2 ################# #KDE-CV ALGORITHM ################# if verbose: print "starting KDE-CV" kde = KdeCV(n_jobs = 1, cv=10, bw = np.linspace(0.01, 1.0, 20)) time_kde_start = time() kde.fit(X) time_kde_stop = time() kde_integrand_KL_loss = IntegrandKLDensity(f_star.pdf, kde.pdf) kde_kl = kl_norm(kde_integrand_KL_loss.pdf, f_star_sampling, sample_size = SAMPLE_SIZE_NORM_MC, hypercube_size=HYPERCUBE_SIZE) kde_integrand_L2_loss = IntegrandL2Density(f_star.pdf, kde.pdf) kde_l2 = l2_norm(kde_integrand_L2_loss.pdf, f_star_sampling, sample_size=SAMPLE_SIZE_NORM_MC, hypercube_size=HYPERCUBE_SIZE) #Compute times kl_aggreg_time = time_kl_aggreg_stop-time_kl_aggreg_start em_bic_time = time_em_stop-time_em_start kde_cv_time = time_kde_stop-time_kde_start if verbose: print "KDE-CV done" print "KL-loss", kde_kl print "L2-loss", kde_l2 #Writing results if write_results: print "OK, writing results" pickle.dump({"K" : K, "p" : dim, "N" : N, "MLE_l2" : kl_aggreg_l2, "MLE_KL" : kl_aggreg_kl, "MLE_time" : kl_aggreg_time, "EM_l2" : em_l2, "EM_KL" : em_kl, "EM_time" : em_bic_time, "KdeCV_l2" : kde_l2, "KdeCV_KL" : kde_kl, "KdeCV_time" : kde_cv_time }, open(FOLDER + "res_" + "K" + str(K) + "p" + str(dim) + "N" + str(N) +"_"+ type_simu_to_str(gof_test, pc_select)+"_"+str(uuid.uuid4()), "wb")) else: # we print the results, for testing. print {"K" : K, "p" : dim, "N" : N, "MLE_l2" : kl_aggreg_l2, "MLE_KL" : kl_aggreg_kl, "MLE_time" : kl_aggreg_time, "EM_l2" : em_l2, "EM_KL" : em_kl, "EM_time" : em_bic_time, "KdeCV_l2" : kde_l2, "KdeCV_KL" : kde_kl, "KdeCV_time" : kde_cv_time } return 1 except Exception as e: print e return 0
def sample(self, N): mixture = create_gaussian_mixture(self.centers, self.cov, self.weights) return mixture.propose(N)
for j in range(remainder): this_patch_lengths[j] += 1 start = 0 stop = this_patch_lengths[0] for next_len in this_patch_lengths: this_data = data_full_chain[start:stop] long_patches_means.append( np.mean(this_data, axis=0) ) if vb_initialization == 'long_patches_rescaled_cov': long_patches_covs.append ( np.cov(this_data, rowvar=0) * cov_rescale_factor) elif vb_initialization == 'long_patches': long_patches_covs.append ( np.cov(this_data, rowvar=0) ) else: raise RuntimeError('Unknown Error') start += next_len stop += next_len mcmcmix = create_gaussian_mixture(long_patches_means, long_patches_covs) try: vb = pypmc.mix_adapt.variational.GaussianInference(data, initial_guess=mcmcmix, nu=(np.zeros(len(mcmcmix))+100.) ) except ValueError: vb = pypmc.mix_adapt.variational.GaussianInference(data, initial_guess=mcmcmix) # set vb_prune vb_prune = .5 * len(data)/len(mcmcmix) params.update((('vb_prune', vb_prune),)) # else --> unrecognized initialization scheme else: raise ValueError("I don't know what you mean by `vb_initialization` = \"%s\"" %vb_initialization) # run the variational bayes try: vb_converged = vb.run(N_max_vb, verbose=True, rel_tol=vb_rel_tol, abs_tol=vb_abs_tol, prune=vb_prune) except NameError:
def run_iter( self, num_gauss_samples=400, max_ncalls=100000, min_ess=400, max_improvement_loops=4, heavytail_laplaceapprox=True, verbose=True, ): """ Iterative version of run(). See documentation there. Returns current samples on each iteration. """ paramnames = self.paramnames loglike = self.loglike transform = self.transform ndim = len(paramnames) optu, cov, invcov = self.optu, self.cov, self.invcov # for numerical stability, use 1e260, so that we can go down be 1e-100, # but up by 1e600 self.Loffset = self.optL #+ 600 # first iteration: create a single gaussian and importance-sample if self.log: self.logger.info("Initiating gaussian importance sampler") def log_target(u): """ log-posterior to sample from """ if (u > 1).any() or (u < 0).any(): return -np.inf p = transform(u) L = loglike(p) return L - self.Loffset if not heavytail_laplaceapprox: initial_proposal = Gauss(optu, cov) else: # make a few gaussians, in case the fit errors were too narrow means, covs, weights = _make_initial_proposal(optu, cov) initial_proposal = create_gaussian_mixture(means, covs, weights) mixes = [initial_proposal] N = num_gauss_samples Nhere = N // self.mpi_size if self.mpi_size > 1: SequentialIS = ImportanceSampler from pypmc.tools.parallel_sampler import MPISampler sampler = MPISampler(SequentialIS, target=log_target, proposal=initial_proposal, prealloc=Nhere) else: sampler = ImportanceSampler(target=log_target, proposal=initial_proposal, prealloc=Nhere) if self.log: self.logger.info(" sampling %d ..." % N) np.seterr(over="warn") sampler.run(Nhere) self.ncall += Nhere * self.mpi_size samples, weights = self._collect_samples(sampler) assert weights.sum() > 0, 'All samples have weight zero.' vbmix = None for it in range(max_improvement_loops): ess_fraction = ess(weights) if self.log: self.logger.info(" sampling efficiency: %.3f%%" % (ess_fraction * 100)) if it % 3 == 0: if self.log: self.logger.info("Optimizing proposal (from scratch) ...") mix = _make_proposal(samples, weights, optu, cov, invcov) vb = GaussianInference(samples, weights=weights, initial_guess=mix, W0=np.eye(ndim) * 1e10) vb_prune = 0.5 * len(vb.data) / vb.K else: if self.log: self.logger.info("Optimizing proposal (from previous) ...") prior_for_proposal_update = vb.posterior2prior() prior_for_proposal_update.pop('alpha0') vb = GaussianInference(samples, initial_guess=vbmix, weights=weights, **prior_for_proposal_update) if self.log: self.logger.info(' running variational Bayes ...') vb.run(1000, rel_tol=1e-8, abs_tol=1e-5, prune=vb_prune, verbose=False) vbmix = vb.make_mixture() if self.log: self.logger.info(' reduced from %d to %d components' % (len(mix.components), len(vbmix.components))) sampler.proposal = vbmix if self.log: self.logger.info("Importance sampling %d ..." % N) sampler.run(N // self.mpi_size) self.ncall += (N // self.mpi_size) * self.mpi_size mixes.append(vbmix) samples, weights = self._collect_samples(sampler) ess_fraction = ess(weights) if self.log: self.logger.debug(" sampling efficiency: %.3f%%" % (ess_fraction * 100)) self.logger.debug(" obtained %.0f new effective samples" % (ess_fraction * len(weights))) samples, weights = self._collect_samples(sampler, all=True, mixes=mixes) ess_fraction = ess(weights) Ndone = ess_fraction * len(weights) result = self._update_results(samples, weights) if Ndone >= min_ess: if self.log: self.logger.info( "Status: Have %d total effective samples, done." % Ndone) yield result break elif self.ncall > max_ncalls: if self.log: self.logger.info( "Status: Have %d total effective samples, reached max number of calls." % Ndone) yield result break else: N = int(1.4 * min(max_ncalls - self.ncall, N)) if self.log: self.logger.info( "Status: Have %d total effective samples, sampling %d next." % (Ndone, N)) yield result
def _make_proposal(samples, weights, optu, cov, invcov): # split samples into 3 equally large groups, by L w1, w2 = np.percentile(weights[weights > 0], [33, 66]) means = [optu] covs = [cov] chunk_weights = [1] # for each group (top: L1 < L, mid: L1 > L > L2, bottom: L < L2) cov_guess = cov for mask in weights >= w1, ~np.logical_or(weights >= w2, weights <= w1), weights <= w2: mask = np.logical_and(mask, weights > 0) if not mask.any(): continue # assume H as distance metric # find most distant point from ML (u) dists = scipy.spatial.distance.cdist(samples[mask, :], [optu], 'mahalanobis', VI=invcov).flatten() # maximum size of clusters: handled = np.zeros(len(dists), dtype=bool) # repeat recursively until no points left while not handled.all(): samples_todo = samples[mask, :][~handled, :] # find most distant point, which is used as the center i = dists[~handled].argmax() # add all points within distance until peak is included d = dists[~handled][i] # but include at most a distance of maxdistance dists_todo = scipy.spatial.distance.cdist(samples_todo, [samples_todo[i, :]], 'mahalanobis', VI=invcov).flatten() selected = dists_todo <= d cluster = samples_todo[selected] #print(" accreted %d (of %d to do)" % (len(cluster), (~handled).sum()), 'from', samples_todo[i, :]) handled[~handled] = selected if len(cluster) < cluster.shape[1]: continue # print(np.diag(np.var(cluster, axis=0))) # cov_guess = np.diag(np.var(cluster, axis=0)) try: cov_local = np.cov(cluster, rowvar=0) # check that it is positive-definite np.linalg.cholesky(cov_local) if not np.all(np.linalg.eigvals(cov_local) > 0): continue except np.linalg.LinAlgError: cov_local = cov_guess # reject, too few points in cluster continue assert np.isfinite(cluster).all(), cluster[~np.isfinite(cluster)] assert np.isfinite(cov_local).all(), ( cov_local, cov_local[np.isfinite(cov_local)]) means.append(np.mean(cluster, axis=0)) covs.append(cov_local) chunk_weights.append(1) chunk_weights = np.asarray(chunk_weights) / np.sum(chunk_weights) mix = create_gaussian_mixture(means, covs, weights=chunk_weights) return mix
weights = numpy.ones(K) / K means = [] covariances = [] for j in range(K): mean = np.zeros(ndim) + 0.5 mean[0] = j * 1. / K means.append(mean) sigma = 10**(mean[0] * 20 - 10) sigma = max(sigma, 10**-difficulty) sigma = min(sigma, 3) cov = np.eye(ndim) * sigma cov[0, 0] = (1. / K) #print mean, cov covariances.append(cov) mix = create_gaussian_mixture(means, covariances, weights) N = 40000 sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, mix, prealloc=N) print('importance sampling ...') #print(' drawing samples...') #samples = mix.propose(N, numpy.random) #print(' computing likelihood ...') #weights_target = loglikelihood(samples) #print(' computing weights...') #weights_proposal = numpy.array([mix.evaluate(sample) for sample in samples]) #weights = exp(weights_target - weights_proposal) sampler.run(N) print('importance sampling done')
# need to partition into K_g parts -- > minimal lexicographic integer partition this_patch_lengths = [chain_length // K_g for j in range(K_g)] remainder = chain_length % K_g for j in range(remainder): this_patch_lengths[j] += 1 start = 0 stop = this_patch_lengths[0] for next_len in this_patch_lengths: this_data = data_full_chain[start:stop] long_patches_means.append( np.mean(this_data, axis=0) ) long_patches_covs.append ( np.cov (this_data, rowvar=0) ) start += next_len stop += next_len hierarchical_init = create_gaussian_mixture(long_patches_means, long_patches_covs) plt.figure() plt.title('hierarchical init') plot_mixture(hierarchical_init, 0,1) plotfile.savefig() # ----------------------- hierarchical clustering -------------------------------------- hc = pypmc.mix_adapt.hierarchical.Hierarchical(mcmcmix, hierarchical_init, verbose=True) hc_converged = hc.run(kill=kill_in_hc) if hc_converged: statusfile.write('hierarchical clustering converged in step %i\n' %(hc_converged) ) else: statusfile.write('hierarchical clustering did not converge\n') reduced_proposal = hc.g