Python BayesianGaussianMixture.sample 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: sklearn.mixture

메소드/함수: sample

hotexamples.com에서의 예제들: 7

Python BayesianGaussianMixture.sample - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 sklearn.mixture.BayesianGaussianMixture.sample에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

BayesianGaussianMixture(30)

fit(30)

predict(30)

fit_predict(30)

predict_proba(26)

score_samples(15)

score(15)

sample(4)

covariance_type(4)

get_params(3)

precisions_(2)

means_(2)

weights_(2)

covariance_prior(2)

covariances_(2)

degrees_of_freedom_prior_(1)

weight_concentration_prior_(1)

weight_concentration_(1)

set_params(1)

_check_is_fitted(1)

_initialize_parameters(1)

aic(1)

bic(1)

converged_(1)

precisions_cholesky_(1)

n_iter_(1)

degrees_of_freedom_(1)

means_init(1)

covariance_prior_(1)

mean_prior_(1)

mean_precision_prior(1)

mean_precision_(1)

_check_initial_parameters(1)

get_BIC_score(1)

get_AIC_score(1)

lower_bound_(1)

예제 #1

파일 보기

파일: mcmc.py 프로젝트: or-tal-robotics/sequential_map_merging

    def gibbs_resampling_EM(self, iter_n=1):
        self.itr = 0
        Np = len(self.Rot)
        for iter in range(iter_n):
            EM = BayesianGaussianMixture(n_components=10)
            EM.fit(self.pf_debug[:, 0:2])
            for i in range(Np):
                sample = EM.sample()
                self.Rot[i].theta += 0.5 * np.random.randn(
                ) + 90.0 * np.random.choice(4, p=[0.8, 0.05, 0.1, 0.05])
                self.Rot[i].x = np.squeeze(sample[0])
                self.Rot[i].y = np.squeeze(sample[1])

            self.likelihood_PF()
            W = self.scores / np.sum(
                self.scores)  # Normalized scores for resampling
            Np = len(self.Rot)
            index = np.random.choice(a=Np, size=Np, p=W)  # resample by score
            Rot_arr = []
            Rot_arr = self.Rot  # creat new temporery array for new sampels

            kmeans = KMeans(n_clusters=4,
                            init='k-means++',
                            max_iter=300,
                            n_init=10,
                            random_state=0)
            kmeans.fit(self.pf_debug[:, 2].reshape((-1, 1)))
            index = np.random.choice(a=4, size=Np)
            for i, idx in enumerate(index):
                self.Rot[i].theta = np.squeeze(
                    kmeans.cluster_centers_[idx]) + 0.5 * np.random.randn()
                self.Rot[i].x += np.random.normal(0, 0.01)
                self.Rot[i].y += np.random.normal(0, 0.01)

            print 'resample done'

예제 #2

파일 보기

파일: entropy.py 프로젝트: llimeht/bumps

def kde_entropy_sklearn_gmm(points, n_est=None, n_components=None):
    """
    Use sklearn.neigbors.KernelDensity pdf to estimate entropy.

    Data is standardized before kde.

    Sample points drawn from gaussian mixture model from original points.

    Fails for bimodal and dirichlet, similar to statsmodels kde.
    """
    from sklearn.mixture import BayesianGaussianMixture as GMM
    n, d = points.shape

    # Default to the full set
    if n_est is None:
        n_est = n

    # reduce size of draw to n_est
    if n_est >= n:
        x = points
    else:
        x = points[permutation(n)[:n_est]]
        n = n_est

    if n_components is None:
        n_components = int(5 * sqrt(d))

    predictor = GMM(
        n_components=n_components,
        covariance_type='full',
        #verbose=True,
        max_iter=1000)
    predictor.fit(x)
    evaluation_points, _ = predictor.sample(n_est)

    logp = sklearn_log_density(x, evaluation_points=evaluation_points)
    H = -np.mean(logp)
    return H / LN2

예제 #3

파일 보기

파일: entropy.py 프로젝트: bumps/bumps

def gmm_entropy(points, n_est=None, n_components=None):
    #from sklearn.mixture import GaussianMixture as GMM
    from sklearn.mixture import BayesianGaussianMixture as GMM
    n, d = points.shape

    # Default to the full set
    if n_est is None:
        n_est = n

    # reduce size of draw to n_est
    if n_est >= n:
        x = points
    else:
        x = points[permutation(n)[:n_est]]
        n = n_est

    if n_components is None:
        n_components = int(5*sqrt(d))

    ## Standardization doesn't seem to help
    ## Note: sigma may be zero
    #x, mu, sigma = standardize(x)   # if standardized
    predictor = GMM(n_components=n_components, covariance_type='full',
                    #verbose=True,
                    max_iter=1000)
    predictor.fit(x)
    eval_x, _ = predictor.sample(n_est)
    weight_x = predictor.score_samples(eval_x)
    H = -np.mean(weight_x)
    #with np.errstate(divide='ignore'): H = H + np.sum(np.log(sigma))   # if standardized
    dH = 0.
    ## cross-check against own calcs
    #alt = GaussianMixture(predictor.weights_, mu=predictor.means_, sigma=predictor.covariances_)
    #print("alt", H, alt.entropy())
    #print(np.vstack((weight_x[:10], alt.logpdf(eval_x[:10]))).T)
    return H / LN2, dH / LN2

예제 #4

파일 보기

파일: entropy.py 프로젝트: bumps/bumps

def kde_entropy_sklearn_gmm(points, n_est=None, n_components=None):
    """
    Use sklearn.neigbors.KernelDensity pdf to estimate entropy.

    Data is standardized before kde.

    Sample points drawn from gaussian mixture model from original points.

    Fails for bimodal and dirichlet, similar to statsmodels kde.
    """
    from sklearn.mixture import BayesianGaussianMixture as GMM
    n, d = points.shape

    # Default to the full set
    if n_est is None:
        n_est = n

    # reduce size of draw to n_est
    if n_est >= n:
        x = points
    else:
        x = points[permutation(n)[:n_est]]
        n = n_est

    if n_components is None:
        n_components = int(5*sqrt(d))

    predictor = GMM(n_components=n_components, covariance_type='full',
                    #verbose=True,
                    max_iter=1000)
    predictor.fit(x)
    evaluation_points, _ = predictor.sample(n_est)

    logp = sklearn_log_density(x, evaluation_points=evaluation_points)
    H = -np.mean(logp)
    return H / LN2

예제 #5

파일 보기

파일: entropy.py 프로젝트: bumps/bumps

def wnn_entropy(points, k=None, weights=True, n_est=None, gmm=None):
    r"""
    Weighted Kozachenko-Leonenko nearest-neighbour entropy calculation.

    *k* is the number of neighbours to consider, with default $k=n^{1/3}$

    *n_est* is the number of points to use for estimating the entropy,
    with default $n_\rm{est} = n$

    *weights* is True for default weights, False for unweighted (using the
    distance to the kth neighbour only), or a vector of weights of length *k*.

    *gmm* is the number of gaussians to use to model the distribution using
    a gaussian mixture model.  Default is 0, and the points represent an
    empirical distribution.

    Returns entropy H in bits and its uncertainty.

    Berrett, T. B., Samworth, R.J., Yuan, M., 2016. Efficient multivariate
    entropy estimation via k-nearest neighbour distances.
    https://arxiv.org/abs/1606.00304
    """
    from sklearn.neighbors import NearestNeighbors
    n, d = points.shape

    # Default to the full set
    if n_est is None:
        n_est = n

    # reduce size of draw to n_est
    if n_est >= n:
        x = points
    else:
        x = points[permutation(n)[:n_est]]
        n = n_est

    # Default k based on n
    if k is None:
        # Private communication: cube root of n is a good choice for k
        # Personal observation: k should be much bigger than d
        k = max(int(n**(1/3)), 3*d)

    # If weights are given then use them (setting the appropriate k),
    # otherwise use the default weights.
    if isinstance(weights, bool):
        weights = _wnn_weights(k, d, weights)
    else:
        k = len(weights)
    #print("weights", weights, sum(weights))

    # select knn algorithm
    algorithm = 'auto'
    #algorithm = 'kd_tree'
    #algorithm = 'ball_tree'
    #algorithm = 'brute'

    n_components = 0 if gmm is None else gmm

    # H = 1/n sum_i=1^n sum_j=1^k w_j log E_{j,i}
    # E_{j,i} = e^-Psi(j) V_d (n-1) z_{j,i}^d = C z^d
    # logC = -Psi(j) + log(V_d) + log(n-1)
    # H = 1/n sum sum w_j logC + d/n sum sum w_j log(z)
    #   = sum w_j logC + d/n sum sum w_j log(z)
    #   = A + d/n B
    # H^2 = 1/n sum
    Psi = digamma(np.arange(1, k+1))
    logVd = d/2*log(pi) - gammaln(1 + d/2)
    logC = -Psi + logVd + log(n-1)

    # TODO: standardizing points doesn't work.
    # Standardize the data so that distances conform.  This is equivalent to
    # a u-substitution u = sigma x + mu, so the integral needs to be corrected
    # for dU = det(sigma) dx.  Since the standardization squishes the dimensions
    # independently, sigma is a diagonal matrix, with the determinant equal to
    # the product of the diagonal elements.
    #x, mu, sigma = standardize(x)  # Note: sigma may be zero
    #detDU = np.prod(sigma)
    detDU = 1.

    if n_components > 0:
        # Use Gaussian mixture to model the distribution
        from sklearn.mixture import GaussianMixture as GMM
        predictor = GMM(n_components=gmm, covariance_type='full')
        predictor.fit(x)
        eval_x, _ = predictor.sample(n_est)
        #weight_x = predictor.score_samples(eval_x)
        skip = 0
    else:
        # Empirical distribution
        # TODO: should we use the full draw for kNN and a subset for eval points?
        # Choose a subset for evaluating the entropy estimate, if desired
        #print(n_est, n)
        #eval_x = x if n_est >= n else x[permutation(n)[:n_est]]
        eval_x = x
        #weight_x = 1
        skip = 1

    tree = NearestNeighbors(algorithm=algorithm, n_neighbors=k+skip)
    tree.fit(x)
    dist, _ind = tree.kneighbors(eval_x, n_neighbors=k+skip, return_distance=True)
    # Remove first column. Since test points are in x, the first column will
    # be a point from x with distance 0, and can be ignored.
    if skip:
        dist = dist[:, skip:]
    # Find log distances.  This can be problematic for MCMC runs where a
    # step is rejected, and therefore identical points are in the distribution.
    # Ignore them by replacing these points with nan and using nanmean.
    # TODO: need proper analysis of duplicated points in MCMC chain
    dist[dist == 0] = nan
    logdist = log(dist)
    H_unweighted = logC + d*np.nanmean(logdist, axis=0)
    H = np.dot(H_unweighted, weights)[0]
    Hsq_k = np.nanmean((logC[-1] + d*logdist[:,-1])**2)
    # TODO: abs shouldn't be needed?
    if Hsq_k < H**2:
        print("warning: avg(H^2) < avg(H)^2")
    dH = sqrt(abs(Hsq_k - H**2)/n_est)
    #print("unweighted", H_unweighted)
    #print("weighted", H, Hsq_k, H**2, dH, detDU, LN2)
    return H * detDU / LN2, dH * detDU / LN2

예제 #6

파일 보기

파일: entropy.py 프로젝트: llimeht/bumps

def wnn_entropy(points, k=None, weights=True, n_est=None, gmm=None):
    r"""
    Weighted Kozachenko-Leonenko nearest-neighbour entropy calculation.

    *k* is the number of neighbours to consider, with default $k=n^{1/3}$

    *n_est* is the number of points to use for estimating the entropy,
    with default $n_\rm{est} = n$

    *weights* is True for default weights, False for unweighted (using the
    distance to the kth neighbour only), or a vector of weights of length *k*.

    *gmm* is the number of gaussians to use to model the distribution using
    a gaussian mixture model.  Default is 0, and the points represent an
    empirical distribution.

    Returns entropy H in bits and its uncertainty.

    Berrett, T. B., Samworth, R.J., Yuan, M., 2016. Efficient multivariate
    entropy estimation via k-nearest neighbour distances.
    DOI:10.1214/18-AOS1688 https://arxiv.org/abs/1606.00304
    """
    from sklearn.neighbors import NearestNeighbors
    n, d = points.shape

    # Default to the full set
    if n_est is None:
        n_est = 10000
    elif n_est == 0:
        n_est = n

    # reduce size of draw to n_est
    if n_est >= n:
        x = points
        n_est = n
    else:
        x = points[permutation(n)[:n_est]]
        n = n_est

    # Default k based on n
    if k is None:
        # Private communication: cube root of n is a good choice for k
        # Personal observation: k should be much bigger than d
        k = max(int(n**(1 / 3)), 3 * d)

    # If weights are given then use them (setting the appropriate k),
    # otherwise use the default weights.
    if isinstance(weights, bool):
        weights = _wnn_weights(k, d, weights)
    else:
        k = len(weights)
    #print("weights", weights, sum(weights))

    # select knn algorithm
    algorithm = 'auto'
    #algorithm = 'kd_tree'
    #algorithm = 'ball_tree'
    #algorithm = 'brute'

    n_components = 0 if gmm is None else gmm

    # H = 1/n sum_i=1^n sum_j=1^k w_j log E_{j,i}
    # E_{j,i} = e^-Psi(j) V_d (n-1) z_{j,i}^d = C z^d
    # logC = -Psi(j) + log(V_d) + log(n-1)
    # H = 1/n sum sum w_j logC + d/n sum sum w_j log(z)
    #   = sum w_j logC + d/n sum sum w_j log(z)
    #   = A + d/n B
    # H^2 = 1/n sum
    Psi = digamma(np.arange(1, k + 1))
    logVd = d / 2 * log(pi) - gammaln(1 + d / 2)
    logC = -Psi + logVd + log(n - 1)

    # TODO: standardizing points doesn't work.
    # Standardize the data so that distances conform.  This is equivalent to
    # a u-substitution u = sigma x + mu, so the integral needs to be corrected
    # for dU = det(sigma) dx.  Since the standardization squishes the dimensions
    # independently, sigma is a diagonal matrix, with the determinant equal to
    # the product of the diagonal elements.
    #x, mu, sigma = standardize(x)  # Note: sigma may be zero
    #detDU = np.prod(sigma)
    detDU = 1.

    if n_components > 0:
        # Use Gaussian mixture to model the distribution
        from sklearn.mixture import GaussianMixture as GMM
        predictor = GMM(n_components=gmm, covariance_type='full')
        predictor.fit(x)
        eval_x, _ = predictor.sample(n_est)
        #weight_x = predictor.score_samples(eval_x)
        skip = 0
    else:
        # Empirical distribution
        # TODO: should we use the full draw for kNN and a subset for eval points?
        # Choose a subset for evaluating the entropy estimate, if desired
        #print(n_est, n)
        #eval_x = x if n_est >= n else x[permutation(n)[:n_est]]
        eval_x = x
        #weight_x = 1
        skip = 1

    tree = NearestNeighbors(algorithm=algorithm, n_neighbors=k + skip)
    tree.fit(x)
    dist, _ind = tree.kneighbors(eval_x,
                                 n_neighbors=k + skip,
                                 return_distance=True)
    # Remove first column. Since test points are in x, the first column will
    # be a point from x with distance 0, and can be ignored.
    if skip:
        dist = dist[:, skip:]
    # Find log distances.  This can be problematic for MCMC runs where a
    # step is rejected, and therefore identical points are in the distribution.
    # Ignore them by replacing these points with nan and using nanmean.
    # TODO: need proper analysis of duplicated points in MCMC chain
    dist[dist == 0] = nan
    logdist = log(dist)
    H_unweighted = logC + d * np.nanmean(logdist, axis=0)
    H = np.dot(H_unweighted, weights)[0]
    Hsq_k = np.nanmean((logC[-1] + d * logdist[:, -1])**2)
    # TODO: abs shouldn't be needed?
    if Hsq_k < H**2:
        print("warning: avg(H^2) < avg(H)^2")
    dH = sqrt(abs(Hsq_k - H**2) / n_est)
    #print("unweighted", H_unweighted)
    #print("weighted", H, Hsq_k, H**2, dH, detDU, LN2)
    return H * detDU / LN2, dH * detDU / LN2

예제 #7

파일 보기

파일: entropy.py 프로젝트: llimeht/bumps

def gmm_entropy(points, n_est=None, n_components=None):
    r"""
    Use sklearn.mixture.BayesianGaussianMixture to estimate entropy.

    *points* are the data points in the sample.

    *n_est* are the number of points to use in the estimation; default is
    10,000 points, or 0 for all the points.

    *n_components* are the number of Gaussians in the mixture. Default is
    $5 \sqrt{d}$ where $d$ is the number of dimensions.

    Returns estimated entropy and uncertainty in the estimate.

    This method uses BayesianGaussianMixture from scikit-learn to build a
    model of the point distribution, then uses Monte Carlo sampling to
    determine the entropy of that distribution. The entropy uncertainty is
    computed from the variance in the MC sample scaled by the number of
    samples. This does not incorporate any uncertainty in the sampling that
    generated the point distribution or the uncertainty in the GMM used to
    model that distribution.
    """
    #from sklearn.mixture import GaussianMixture as GMM
    from sklearn.mixture import BayesianGaussianMixture as GMM
    n, d = points.shape

    # Default to the full set
    if n_est is None:
        n_est = 10000
    elif n_est == 0:
        n_est = n

    # reduce size of draw to n_est
    if n_est >= n:
        x = points
        n_est = n
    else:
        x = points[permutation(n)[:n_est]]
        n = n_est

    if n_components is None:
        n_components = int(5 * sqrt(d))

    ## Standardization doesn't seem to help
    ## Note: sigma may be zero
    #x, mu, sigma = standardize(x)   # if standardized
    predictor = GMM(
        n_components=n_components,
        covariance_type='full',
        #verbose=True,
        max_iter=1000)
    predictor.fit(x)
    eval_x, _ = predictor.sample(n_est)
    weight_x = predictor.score_samples(eval_x)
    H = -np.mean(weight_x)
    #with np.errstate(divide='ignore'): H = H + np.sum(np.log(sigma))   # if standardized
    dH = np.std(weight_x, ddof=1) / sqrt(n)
    ## cross-check against own calcs
    #alt = GaussianMixture(predictor.weights_, mu=predictor.means_, sigma=predictor.covariances_)
    #print("alt", H, alt.entropy())
    #print(np.vstack((weight_x[:10], alt.logpdf(eval_x[:10]))).T)
    return H / LN2, dH / LN2