Beispiel #1
0
    def sample_cond_dist(self, Y, n_samples):
        """ Returns conditional samples from the gaussian mixture model.

        Keyword arguments:
        Y -- A numpy vector of the same length as the input data samples with either
            values or np.nan. A two dimensional input array could be
            np.array([3, np.nan]). The gmm would be sampled with a fixed first
            dimension of 3.
        n_samples -- Number of requested samples
        """

        # get the conditional distribution
        (con_means, con_covariances, con_weights) = self.cond_dist(Y)

        #sample from the conditional distribution
        samples = pypr_gmm.sample_gaussian_mixture(con_means, con_covariances,
                                                   con_weights, n_samples)

        # find the columns where the nans are
        nan_cols = np.where(np.isnan(Y))[0]

        # extend the input to the length of the samples
        full_samples = np.tile(Y, (n_samples, 1))

        #copy the sample columns
        full_samples[:, nan_cols] = samples

        return full_samples
Beispiel #2
0
def generateData(n):
    mc = [0.4, 0.4, 0.2] # Mixing coefficients
    centroids = [ array([0,0]), array([3,3]), array([0,4]) ]
    ccov = [ array([[1,0.4],[0.4,1]]), diag((1,2)), diag((0.4,0.1)) ]

    # Generate samples from the gaussian mixture model
    X = gmm.sample_gaussian_mixture(centroids, ccov, mc, samples=n)
    return X
Beispiel #3
0
    def sample_cond_dist(self, Y, n_samples):

        # get the conditional distribution
        (con_means, con_covariances, con_weights) = self.cond_dist(Y)

        #sample from the conditional distribution
        samples = pypr_gmm.sample_gaussian_mixture(con_means, con_covariances,
                                                   con_weights, n_samples)

        # find the columns where the nans are
        nan_cols = np.where(np.isnan(Y))[0]

        # extend the input to the length of the samples
        full_samples = np.tile(Y, (n_samples, 1))

        #copy the sample columns
        full_samples[:, nan_cols] = samples

        return full_samples
Beispiel #4
0
def generate_data(n_samples):
    mc = [0.4, 0.4, 0.2]  # Mixing coefficients
    centroids = [
        np.array([0, 0]),
        np.array([3, 3]),
        np.array([0, 4])
    ]
    ccov = [
        np.array([[1, 0.4], [0.4, 1]]),
        np.diag((1, 2)),
        np.diag((0.4, 0.1))
    ]

    # Generate samples from the gaussian mixture model
    samples = gmm.sample_gaussian_mixture(centroids, ccov, mc, samples=n_samples)
    xs, ys = samples[:, 0], samples[:, 1]
    probs = np.zeros([n_samples], dtype=np.float32)
    for it in range(n_samples):
        input_ = np.array([xs[it], np.nan])
        con_cen, con_cov, new_p_k = gmm.cond_dist(input_, centroids, ccov, mc)
        prob = gmm.gmm_pdf(ys[it], con_cen, con_cov, new_p_k)
        probs[it] = prob
    return xs, ys, probs
Beispiel #5
0
cen_lst = []
cov_lst = []

# Generate cluster centers, covariance, and mixing coefficients:
sigma_scl = 0.1
X = np.zeros((samples_pr_cluster * K_orig, D))
for k in range(K_orig):
    mu = np.random.randn(D)
    sigma = np.eye(D) * sigma_scl
    cen_lst.append(mu)
    cov_lst.append(sigma)
mc = np.ones(K_orig) / K_orig  # All clusters equally probable

# Sample from the mixture:
N = 1000
X = gmm.sample_gaussian_mixture(cen_lst, cov_lst, mc, samples=N)

K_range = list(range(2, 10))
runs = 10
bic_table = np.zeros((len(K_range), runs))
for K_idx, K in enumerate(K_range):
    print("Clustering for K=%d" % K)
    for i in range(runs):
        cluster_init_kw = {'cluster_init':'sample', 'max_init_iter':5, \
            'cov_init':'var', 'verbose':True}
        cen_lst, cov_lst, p_k, logL = gmm.em_gm(X, K = K, max_iter = 1000, \
            delta_stop=1e-2, init_kw=cluster_init_kw, verbose=True, max_tries=10)
        bic = stattest.bic_gmm(logL, N, D, K)
        bic_table[K_idx, i] = bic

plot(K_range, bic_table)
Beispiel #6
0
 def generate_samples(self):
     self.samples = gmm.sample_gaussian_mixture(self.mean,
                                                self.var,
                                                self.weight,
                                                samples=self.n_samples)
     '''
Beispiel #7
0
 def sample(self, nsamples=1):
     '''
     produce samples
     '''
     return gmm.sample_gaussian_mixture(self.mu, self.sigma, self.pi,
                                        nsamples)
Beispiel #8
0
# Drawing samples from a Gaussian Mixture Model
from numpy import *
from matplotlib.pylab import *
import pypr.clustering.gmm as gmm

mc = [0.4, 0.4, 0.2] # Mixing coefficients
centroids = [ array([0,0]), array([3,3]), array([0,4]) ]
ccov = [ array([[1,0.4],[0.4,1]]), diag((1,2)), diag((0.4,0.1)) ]
    # Covariance matrices

X = gmm.sample_gaussian_mixture(centroids, ccov, mc, samples=1000)
plot(X[:,0], X[:,1], '.')

for i in range(len(mc)):
    x1, x2 = gmm.gauss_ellipse_2d(centroids[i], ccov[i])
    plot(x1, x2, 'k', linewidth=2)
xlabel('$x_1$'); ylabel('$x_2$')
# Drawing samples from a Gaussian Mixture Model
from numpy import *
from matplotlib.pylab import *
import pypr.clustering.gmm as gmm
import pypr.stattest as stattest

seed(10)
mc = [0.4, 0.4, 0.2] # Mixing coefficients
centroids = [ array([0,0]), array([3,3]), array([0,4]) ]
ccov = [ array([[1,0.4],[0.4,1]]), diag((1,2)), diag((0.4,0.1)) ]
    # Covariance matrices

T = gmm.sample_gaussian_mixture(centroids, ccov, mc, samples=500)
V = gmm.sample_gaussian_mixture(centroids, ccov, mc, samples=500)
plot(T[:,0], T[:,1], '.')

# Expectation-Maximization of Mixture of Gaussians
Krange = range(1, 20 + 1);
runs = 1
meanLogL_train = np.zeros((len(Krange), runs))
meanLogL_valid = np.zeros((len(Krange), runs))
for K in Krange:
    print "Clustering for K = ", K; sys.stdout.flush()
    for r in range(runs):
        cen_lst, cov_lst, p_k, logL = gmm.em_gm(T, K = K, iter = 100)
        meanLogL_train[K-1, r] = logL
        meanLogL_valid[K-1, r] = gmm.gm_log_likelihood(V, cen_lst, cov_lst, p_k)

fig1 = figure()
subplot(1, 2, 1)
for r in range(runs):
if __name__ == '__main__':

    # Generate data from two gaussians

    alpha = 0.05
    epsilon = 0.0
    mu = 0.0
    mu1 = mu - epsilon
    mu2 = mu + epsilon
    sigma1 = 0.05
    sigma2 = 0.05

    N = 1000

    # Sample data from two gaussians
    X = pygmm.sample_gaussian_mixture([np.array([mu1]), np.array([mu2])], [[[sigma1]], [[sigma2]]], [alpha, 1.-alpha], samples=N)[:, 0]

    dx = 0.2
    deltaX = dx*np.random.randn(N)

    # Gradient epsilon
    f1 = lambda x, epsilon: 1./(np.sqrt(2*np.pi)*sigma1)*np.exp(-0.5*(x - mu + epsilon)**2./sigma1**2.)
    f2 = lambda x, epsilon: 1./(np.sqrt(2*np.pi)*sigma2)*np.exp(-0.5*(x - mu - epsilon)**2./sigma2**2.)
    h = lambda x, epsilon: alpha*f1(x, epsilon) + (1.-alpha)*f2(x, epsilon)
    g = lambda x, epsilon: (1.-alpha)*f2(x, epsilon)*(x - mu - epsilon)/sigma2**2. - alpha*f1(x, epsilon)*(x - mu + epsilon)/sigma1**2.
    par_g_eps = lambda x, epsilon: alpha*f1(x, epsilon)*((x - mu + epsilon)**2. - sigma1**2.)/sigma1**4. + (1. - alpha)*f2(x, epsilon)*((x - mu - epsilon)**2. - sigma2**2.)/sigma2**4.
    par_g_x = lambda x, epsilon: alpha*f1(x, epsilon)*(x-mu+epsilon)**2./sigma1**4.-alpha*f1(x, epsilon)/sigma1**2. + (1. - alpha)*f2(x, epsilon)/sigma2**2. - (1.-alpha)*f2(x, epsilon)*(x - mu - epsilon)**2./sigma2**4.
    par_h_x = lambda x, epsilon: -(1.-alpha)*f2(x, epsilon)*(x - mu - epsilon)/sigma2**2. - alpha*f1(x, epsilon)*(x - mu + epsilon)/sigma1**2.

    par_ll_eps_fct = lambda epsilon, x: g(x, epsilon)/h(x, epsilon)
    par_ll_eps_sum_fct = lambda epsilon, x: np.sum(np.ma.masked_invalid(par_ll_eps_fct(epsilon, x)))
cen_lst = []
cov_lst = []

# Generate cluster centers, covariance, and mixing coefficients:
sigma_scl = 0.1
X = np.zeros((samples_pr_cluster * K_orig, D))
for k in range(K_orig):
    mu = np.random.randn(D)
    sigma = np.eye(D) * sigma_scl
    cen_lst.append(mu)
    cov_lst.append(sigma)
mc = np.ones(K_orig) / K_orig  # All clusters equally probable

# Sample from the mixture:
N = 1000
X = gmm.sample_gaussian_mixture(cen_lst, cov_lst, mc, samples=N)

K_range = range(2, 10)
runs = 10
bic_table = np.zeros((len(K_range), runs))
for K_idx, K in enumerate(K_range):
    print "Clustering for K=%d" % K
    for i in range(runs):
        cluster_init_kw = {"cluster_init": "sample", "max_init_iter": 5, "cov_init": "var", "verbose": True}
        cen_lst, cov_lst, p_k, logL = gmm.em_gm(
            X, K=K, max_iter=1000, delta_stop=1e-2, init_kw=cluster_init_kw, verbose=True, max_tries=10
        )
        bic = stattest.bic_gmm(logL, N, D, K)
        bic_table[K_idx, i] = bic

plot(K_range, bic_table)
Beispiel #12
0
        for i in range(len(cen_lst)):
            x, y = gmm.gauss_ellipse_2d(cen_lst[i], cov_lst[i])
            plot(x, y, 'k', linewidth=0.5)


seed(1)
mc = [0.4, 0.4, 0.2]  # Mixing coefficients
centroids = [array([0, 0, 0]), array([3, 3, 2]), array([0, 4, 3])]
ccov = [
    array([[1, 0.4, 0.4], [0.4, 1, 0.4], [0.4, 0.4, 1]]),
    diag((1, 2, 0.4)),
    diag((0.4, 0.1, 1))
]

# Generate samples from the gaussian mixture model
X = gmm.sample_gaussian_mixture(centroids, ccov, mc, samples=500)
fig = figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:500, 0].tolist(),
           X[:500, 1].tolist(),
           X[:500, 2].tolist(),
           alpha=0.4)

# Expectation-Maximization of Mixture of Gaussians
cen_lst, cov_lst, p_k, logL = gmm.em_gm(X,
                                        K=4,
                                        max_iter=400,
                                        verbose=True,
                                        iter_call=None)
print "Log likelihood (how well the data fits the model) = ", logL