def gibbs_resampling_EM(self, iter_n=1): self.itr = 0 Np = len(self.Rot) for iter in range(iter_n): EM = BayesianGaussianMixture(n_components=10) EM.fit(self.pf_debug[:, 0:2]) for i in range(Np): sample = EM.sample() self.Rot[i].theta += 0.5 * np.random.randn( ) + 90.0 * np.random.choice(4, p=[0.8, 0.05, 0.1, 0.05]) self.Rot[i].x = np.squeeze(sample[0]) self.Rot[i].y = np.squeeze(sample[1]) self.likelihood_PF() W = self.scores / np.sum( self.scores) # Normalized scores for resampling Np = len(self.Rot) index = np.random.choice(a=Np, size=Np, p=W) # resample by score Rot_arr = [] Rot_arr = self.Rot # creat new temporery array for new sampels kmeans = KMeans(n_clusters=4, init='k-means++', max_iter=300, n_init=10, random_state=0) kmeans.fit(self.pf_debug[:, 2].reshape((-1, 1))) index = np.random.choice(a=4, size=Np) for i, idx in enumerate(index): self.Rot[i].theta = np.squeeze( kmeans.cluster_centers_[idx]) + 0.5 * np.random.randn() self.Rot[i].x += np.random.normal(0, 0.01) self.Rot[i].y += np.random.normal(0, 0.01) print 'resample done'
def kde_entropy_sklearn_gmm(points, n_est=None, n_components=None): """ Use sklearn.neigbors.KernelDensity pdf to estimate entropy. Data is standardized before kde. Sample points drawn from gaussian mixture model from original points. Fails for bimodal and dirichlet, similar to statsmodels kde. """ from sklearn.mixture import BayesianGaussianMixture as GMM n, d = points.shape # Default to the full set if n_est is None: n_est = n # reduce size of draw to n_est if n_est >= n: x = points else: x = points[permutation(n)[:n_est]] n = n_est if n_components is None: n_components = int(5 * sqrt(d)) predictor = GMM( n_components=n_components, covariance_type='full', #verbose=True, max_iter=1000) predictor.fit(x) evaluation_points, _ = predictor.sample(n_est) logp = sklearn_log_density(x, evaluation_points=evaluation_points) H = -np.mean(logp) return H / LN2
def gmm_entropy(points, n_est=None, n_components=None): #from sklearn.mixture import GaussianMixture as GMM from sklearn.mixture import BayesianGaussianMixture as GMM n, d = points.shape # Default to the full set if n_est is None: n_est = n # reduce size of draw to n_est if n_est >= n: x = points else: x = points[permutation(n)[:n_est]] n = n_est if n_components is None: n_components = int(5*sqrt(d)) ## Standardization doesn't seem to help ## Note: sigma may be zero #x, mu, sigma = standardize(x) # if standardized predictor = GMM(n_components=n_components, covariance_type='full', #verbose=True, max_iter=1000) predictor.fit(x) eval_x, _ = predictor.sample(n_est) weight_x = predictor.score_samples(eval_x) H = -np.mean(weight_x) #with np.errstate(divide='ignore'): H = H + np.sum(np.log(sigma)) # if standardized dH = 0. ## cross-check against own calcs #alt = GaussianMixture(predictor.weights_, mu=predictor.means_, sigma=predictor.covariances_) #print("alt", H, alt.entropy()) #print(np.vstack((weight_x[:10], alt.logpdf(eval_x[:10]))).T) return H / LN2, dH / LN2
def kde_entropy_sklearn_gmm(points, n_est=None, n_components=None): """ Use sklearn.neigbors.KernelDensity pdf to estimate entropy. Data is standardized before kde. Sample points drawn from gaussian mixture model from original points. Fails for bimodal and dirichlet, similar to statsmodels kde. """ from sklearn.mixture import BayesianGaussianMixture as GMM n, d = points.shape # Default to the full set if n_est is None: n_est = n # reduce size of draw to n_est if n_est >= n: x = points else: x = points[permutation(n)[:n_est]] n = n_est if n_components is None: n_components = int(5*sqrt(d)) predictor = GMM(n_components=n_components, covariance_type='full', #verbose=True, max_iter=1000) predictor.fit(x) evaluation_points, _ = predictor.sample(n_est) logp = sklearn_log_density(x, evaluation_points=evaluation_points) H = -np.mean(logp) return H / LN2
def wnn_entropy(points, k=None, weights=True, n_est=None, gmm=None): r""" Weighted Kozachenko-Leonenko nearest-neighbour entropy calculation. *k* is the number of neighbours to consider, with default $k=n^{1/3}$ *n_est* is the number of points to use for estimating the entropy, with default $n_\rm{est} = n$ *weights* is True for default weights, False for unweighted (using the distance to the kth neighbour only), or a vector of weights of length *k*. *gmm* is the number of gaussians to use to model the distribution using a gaussian mixture model. Default is 0, and the points represent an empirical distribution. Returns entropy H in bits and its uncertainty. Berrett, T. B., Samworth, R.J., Yuan, M., 2016. Efficient multivariate entropy estimation via k-nearest neighbour distances. https://arxiv.org/abs/1606.00304 """ from sklearn.neighbors import NearestNeighbors n, d = points.shape # Default to the full set if n_est is None: n_est = n # reduce size of draw to n_est if n_est >= n: x = points else: x = points[permutation(n)[:n_est]] n = n_est # Default k based on n if k is None: # Private communication: cube root of n is a good choice for k # Personal observation: k should be much bigger than d k = max(int(n**(1/3)), 3*d) # If weights are given then use them (setting the appropriate k), # otherwise use the default weights. if isinstance(weights, bool): weights = _wnn_weights(k, d, weights) else: k = len(weights) #print("weights", weights, sum(weights)) # select knn algorithm algorithm = 'auto' #algorithm = 'kd_tree' #algorithm = 'ball_tree' #algorithm = 'brute' n_components = 0 if gmm is None else gmm # H = 1/n sum_i=1^n sum_j=1^k w_j log E_{j,i} # E_{j,i} = e^-Psi(j) V_d (n-1) z_{j,i}^d = C z^d # logC = -Psi(j) + log(V_d) + log(n-1) # H = 1/n sum sum w_j logC + d/n sum sum w_j log(z) # = sum w_j logC + d/n sum sum w_j log(z) # = A + d/n B # H^2 = 1/n sum Psi = digamma(np.arange(1, k+1)) logVd = d/2*log(pi) - gammaln(1 + d/2) logC = -Psi + logVd + log(n-1) # TODO: standardizing points doesn't work. # Standardize the data so that distances conform. This is equivalent to # a u-substitution u = sigma x + mu, so the integral needs to be corrected # for dU = det(sigma) dx. Since the standardization squishes the dimensions # independently, sigma is a diagonal matrix, with the determinant equal to # the product of the diagonal elements. #x, mu, sigma = standardize(x) # Note: sigma may be zero #detDU = np.prod(sigma) detDU = 1. if n_components > 0: # Use Gaussian mixture to model the distribution from sklearn.mixture import GaussianMixture as GMM predictor = GMM(n_components=gmm, covariance_type='full') predictor.fit(x) eval_x, _ = predictor.sample(n_est) #weight_x = predictor.score_samples(eval_x) skip = 0 else: # Empirical distribution # TODO: should we use the full draw for kNN and a subset for eval points? # Choose a subset for evaluating the entropy estimate, if desired #print(n_est, n) #eval_x = x if n_est >= n else x[permutation(n)[:n_est]] eval_x = x #weight_x = 1 skip = 1 tree = NearestNeighbors(algorithm=algorithm, n_neighbors=k+skip) tree.fit(x) dist, _ind = tree.kneighbors(eval_x, n_neighbors=k+skip, return_distance=True) # Remove first column. Since test points are in x, the first column will # be a point from x with distance 0, and can be ignored. if skip: dist = dist[:, skip:] # Find log distances. This can be problematic for MCMC runs where a # step is rejected, and therefore identical points are in the distribution. # Ignore them by replacing these points with nan and using nanmean. # TODO: need proper analysis of duplicated points in MCMC chain dist[dist == 0] = nan logdist = log(dist) H_unweighted = logC + d*np.nanmean(logdist, axis=0) H = np.dot(H_unweighted, weights)[0] Hsq_k = np.nanmean((logC[-1] + d*logdist[:,-1])**2) # TODO: abs shouldn't be needed? if Hsq_k < H**2: print("warning: avg(H^2) < avg(H)^2") dH = sqrt(abs(Hsq_k - H**2)/n_est) #print("unweighted", H_unweighted) #print("weighted", H, Hsq_k, H**2, dH, detDU, LN2) return H * detDU / LN2, dH * detDU / LN2
def wnn_entropy(points, k=None, weights=True, n_est=None, gmm=None): r""" Weighted Kozachenko-Leonenko nearest-neighbour entropy calculation. *k* is the number of neighbours to consider, with default $k=n^{1/3}$ *n_est* is the number of points to use for estimating the entropy, with default $n_\rm{est} = n$ *weights* is True for default weights, False for unweighted (using the distance to the kth neighbour only), or a vector of weights of length *k*. *gmm* is the number of gaussians to use to model the distribution using a gaussian mixture model. Default is 0, and the points represent an empirical distribution. Returns entropy H in bits and its uncertainty. Berrett, T. B., Samworth, R.J., Yuan, M., 2016. Efficient multivariate entropy estimation via k-nearest neighbour distances. DOI:10.1214/18-AOS1688 https://arxiv.org/abs/1606.00304 """ from sklearn.neighbors import NearestNeighbors n, d = points.shape # Default to the full set if n_est is None: n_est = 10000 elif n_est == 0: n_est = n # reduce size of draw to n_est if n_est >= n: x = points n_est = n else: x = points[permutation(n)[:n_est]] n = n_est # Default k based on n if k is None: # Private communication: cube root of n is a good choice for k # Personal observation: k should be much bigger than d k = max(int(n**(1 / 3)), 3 * d) # If weights are given then use them (setting the appropriate k), # otherwise use the default weights. if isinstance(weights, bool): weights = _wnn_weights(k, d, weights) else: k = len(weights) #print("weights", weights, sum(weights)) # select knn algorithm algorithm = 'auto' #algorithm = 'kd_tree' #algorithm = 'ball_tree' #algorithm = 'brute' n_components = 0 if gmm is None else gmm # H = 1/n sum_i=1^n sum_j=1^k w_j log E_{j,i} # E_{j,i} = e^-Psi(j) V_d (n-1) z_{j,i}^d = C z^d # logC = -Psi(j) + log(V_d) + log(n-1) # H = 1/n sum sum w_j logC + d/n sum sum w_j log(z) # = sum w_j logC + d/n sum sum w_j log(z) # = A + d/n B # H^2 = 1/n sum Psi = digamma(np.arange(1, k + 1)) logVd = d / 2 * log(pi) - gammaln(1 + d / 2) logC = -Psi + logVd + log(n - 1) # TODO: standardizing points doesn't work. # Standardize the data so that distances conform. This is equivalent to # a u-substitution u = sigma x + mu, so the integral needs to be corrected # for dU = det(sigma) dx. Since the standardization squishes the dimensions # independently, sigma is a diagonal matrix, with the determinant equal to # the product of the diagonal elements. #x, mu, sigma = standardize(x) # Note: sigma may be zero #detDU = np.prod(sigma) detDU = 1. if n_components > 0: # Use Gaussian mixture to model the distribution from sklearn.mixture import GaussianMixture as GMM predictor = GMM(n_components=gmm, covariance_type='full') predictor.fit(x) eval_x, _ = predictor.sample(n_est) #weight_x = predictor.score_samples(eval_x) skip = 0 else: # Empirical distribution # TODO: should we use the full draw for kNN and a subset for eval points? # Choose a subset for evaluating the entropy estimate, if desired #print(n_est, n) #eval_x = x if n_est >= n else x[permutation(n)[:n_est]] eval_x = x #weight_x = 1 skip = 1 tree = NearestNeighbors(algorithm=algorithm, n_neighbors=k + skip) tree.fit(x) dist, _ind = tree.kneighbors(eval_x, n_neighbors=k + skip, return_distance=True) # Remove first column. Since test points are in x, the first column will # be a point from x with distance 0, and can be ignored. if skip: dist = dist[:, skip:] # Find log distances. This can be problematic for MCMC runs where a # step is rejected, and therefore identical points are in the distribution. # Ignore them by replacing these points with nan and using nanmean. # TODO: need proper analysis of duplicated points in MCMC chain dist[dist == 0] = nan logdist = log(dist) H_unweighted = logC + d * np.nanmean(logdist, axis=0) H = np.dot(H_unweighted, weights)[0] Hsq_k = np.nanmean((logC[-1] + d * logdist[:, -1])**2) # TODO: abs shouldn't be needed? if Hsq_k < H**2: print("warning: avg(H^2) < avg(H)^2") dH = sqrt(abs(Hsq_k - H**2) / n_est) #print("unweighted", H_unweighted) #print("weighted", H, Hsq_k, H**2, dH, detDU, LN2) return H * detDU / LN2, dH * detDU / LN2
def gmm_entropy(points, n_est=None, n_components=None): r""" Use sklearn.mixture.BayesianGaussianMixture to estimate entropy. *points* are the data points in the sample. *n_est* are the number of points to use in the estimation; default is 10,000 points, or 0 for all the points. *n_components* are the number of Gaussians in the mixture. Default is $5 \sqrt{d}$ where $d$ is the number of dimensions. Returns estimated entropy and uncertainty in the estimate. This method uses BayesianGaussianMixture from scikit-learn to build a model of the point distribution, then uses Monte Carlo sampling to determine the entropy of that distribution. The entropy uncertainty is computed from the variance in the MC sample scaled by the number of samples. This does not incorporate any uncertainty in the sampling that generated the point distribution or the uncertainty in the GMM used to model that distribution. """ #from sklearn.mixture import GaussianMixture as GMM from sklearn.mixture import BayesianGaussianMixture as GMM n, d = points.shape # Default to the full set if n_est is None: n_est = 10000 elif n_est == 0: n_est = n # reduce size of draw to n_est if n_est >= n: x = points n_est = n else: x = points[permutation(n)[:n_est]] n = n_est if n_components is None: n_components = int(5 * sqrt(d)) ## Standardization doesn't seem to help ## Note: sigma may be zero #x, mu, sigma = standardize(x) # if standardized predictor = GMM( n_components=n_components, covariance_type='full', #verbose=True, max_iter=1000) predictor.fit(x) eval_x, _ = predictor.sample(n_est) weight_x = predictor.score_samples(eval_x) H = -np.mean(weight_x) #with np.errstate(divide='ignore'): H = H + np.sum(np.log(sigma)) # if standardized dH = np.std(weight_x, ddof=1) / sqrt(n) ## cross-check against own calcs #alt = GaussianMixture(predictor.weights_, mu=predictor.means_, sigma=predictor.covariances_) #print("alt", H, alt.entropy()) #print(np.vstack((weight_x[:10], alt.logpdf(eval_x[:10]))).T) return H / LN2, dH / LN2