Esempio n. 1
0
    def gmm(data, k):
        """Cluster based on gaussian mixture models 
        
        Parameters
        ----------
        data : dict
            features structure
        k :  int
            number of clusters

        Returns
        -------
        cl : int array
            cluster indicies
       
        Notes
        -----
        This function requires scikits-learn
         
        """
        try:
            clf = mixture.GMM(n_states=k, cvtype='full')
        except TypeError:
            clf = mixture.GMM(n_components=k, cvtype='full')
        clf.fit(data)
        cl = clf.predict(data)
        return cl
def classify_speech(energy):
    """ Trains a two mixture GMM based on the energy component in each frame.
    
    Usage: energy_segmentation = classify_speech(energy)
    
    Inputs:
    energy -- Numpy array of energy in each frame
    
    Outputs:
    energy_segmentation -- Numpy array of detected speech (1=speech, 0=silence)
    """

    g = mixture.GMM(n_states=2)
    g.fit(energy)

    #Higher energy indicates speech, location in GMM mean
    #vector can vary based on initial conditions
    if g.means[0] > g.means[1]:
        energy_tag = 0
        silence_tag = 1
    else:
        energy_tag = 1
        silence_tag = 0

    speech_predict = g.predict(energy)

    #Return 1 where speech detected, 0 otherwise
    energy_segmentation = np.zeros(np.shape(energy), dtype=np.int)
    energy_segmentation[speech_predict == energy_tag] = 1
    energy_segmentation[speech_predict == silence_tag] = 0

    return energy_segmentation
Esempio n. 3
0
    def test_train(self, params='wmc'):
        g = mixture.GMM(self.n_states, self.cvtype)
        g.weights = self.weights
        g.means = self.means
        g._covars = 20 * self.covars[self.cvtype]

        # Create a training set by sampling from the predefined distribution.
        train_obs = g.rvs(n_samples=100)

        g.fit(train_obs, n_iter=0, init_params=params)

        # Do one training iteration at a time so we can keep track of
        # the log likelihood to make sure that it increases after each
        # iteration.
        trainll = []
        for iter in xrange(5):
            g.fit(train_obs,
                  n_iter=1,
                  params=params,
                  init_params='',
                  min_covar=1e-1)
            trainll.append(g.score(train_obs).sum())
        # Note that the log likelihood will sometimes decrease by a
        # very small amount after it has more or less converged due to
        # the addition of min_covar to the covariance (to prevent
        # underflow).  This is why the threshold is set to -0.5
        # instead of 0.
        self.assertTrue(np.all(np.diff(trainll) > -0.5))
def create_random_gmm(n_mix, n_features, cvtype):
    from scikits.learn import mixture

    g = mixture.GMM(n_mix, cvtype=cvtype)
    g.means = np.random.randint(-20, 20, (n_mix, n_features))
    mincv = 0.1
    <<<<<<< REMOTE
Esempio n. 5
0
def test_GMM_attributes():
    n_states, n_features = 10, 4
    cvtype = 'diag'
    g = mixture.GMM(n_states, cvtype)
    weights = np.random.rand(n_states)
    weights = weights / weights.sum()
    means = np.random.randint(-20, 20, (n_states, n_features))

    assert g.n_states == n_states
    assert g.cvtype == cvtype

    g.weights = weights
    assert_array_almost_equal(g.weights, weights)
    assert_raises(ValueError, g.__setattr__, 'weights', 2 * weights)
    assert_raises(ValueError, g.__setattr__, 'weights', [])
    assert_raises(ValueError, g.__setattr__, 'weights',
                  np.zeros((n_states - 2, n_features)))

    g.means = means
    assert_array_almost_equal(g.means, means)
    assert_raises(ValueError, g.__setattr__, 'means', [])
    assert_raises(ValueError, g.__setattr__, 'means',
                  np.zeros((n_states - 2, n_features)))

    covars = (0.1 + 2 * np.random.rand(n_states, n_features))**2
    g._covars = covars
    assert_array_almost_equal(g._covars, covars)
    assert_raises(ValueError, g.__setattr__, 'covars', [])
    assert_raises(ValueError, g.__setattr__, 'covars',
                  np.zeros((n_states - 2, n_features)))

    assert_raises(ValueError, mixture.GMM, n_states=20, cvtype='badcvtype')
Esempio n. 6
0
    def test_rvs(self, n=100):
        g = mixture.GMM(self.n_states, self.cvtype)
        # Make sure the means are far apart so posteriors.argmax()
        # picks the actual component used to generate the observations.
        g.means = 20 * self.means
        g._covars = np.maximum(self.covars[self.cvtype], 0.1)
        g.weights = self.weights

        samples = g.rvs(n)
        self.assertEquals(samples.shape, (n, self.n_features))
def convert_type_db_gmm(u):
    """
    Given compressed numpy arrays that specify a GMM from database
    convert back to type scikits.learn.GMM
    """

    world = mixture.GMM(n_states=np.size(u.ubm_means, 0))
    world.means = u.ubm_means
    vars_diag = np.zeros((np.size(u.ubm_vars, 0), np.size(u.ubm_vars, 1)),
                         dtype=np.float)
    for i in range(np.size(vars_diag, 0)):
        vars_diag[i, :] = np.diag(u.ubm_vars[i])
    world.covars = vars_diag
    world.weights = np.reshape(u.ubm_weights, (np.size(u.ubm_weights, 0), ))

    return world
Esempio n. 8
0
    def test_eval(self):
        g = mixture.GMM(self.n_states, self.cvtype)
        # Make sure the means are far apart so posteriors.argmax()
        # picks the actual component used to generate the observations.
        g.means = 20 * self.means
        g._covars = self.covars[self.cvtype]
        g.weights = self.weights

        gaussidx = np.repeat(range(self.n_states), 5)
        nobs = len(gaussidx)
        obs = np.random.randn(nobs, self.n_features) + g.means[gaussidx]

        ll, posteriors = g.eval(obs)

        self.assertEqual(len(ll), nobs)
        self.assertEqual(posteriors.shape, (nobs, self.n_states))
        assert_array_almost_equal(posteriors.sum(axis=1), np.ones(nobs))
        assert_array_equal(posteriors.argmax(axis=1), gaussidx)
def create_random_gmm(n_mix, n_features, cvtype):
    from scikits.learn import mixture

    g = mixture.GMM(n_mix, cvtype=cvtype)
    g.means = np.random.randint(-20, 20, (n_mix, n_features))
    mincv = 0.1
    g.covars = {
        'spherical': (mincv + mincv * np.random.rand(n_mix))**2,
        'tied':
        _generate_random_spd_matrix(n_features) + mincv * np.eye(n_features),
        'diag': (mincv + mincv * np.random.rand(n_mix, n_features))**2,
        'full':
        np.array([
            _generate_random_spd_matrix(n_features) +
            mincv * np.eye(n_features) for x in xrange(n_mix)
        ])
    }[cvtype]
    g.weights = hmm.normalize(np.random.rand(n_mix))
    return g
def map_adapt_speaker(obs, world, r_m=16.0, r_w=16.0):

    # Catch fatal errors
    assert isinstance(world, type(mixture.GMM()))
    assert np.size(obs, 1) == np.size(world.means, 1)
    assert np.size(obs, 0) > 1

    #Cast if passed in int type
    r_m = float(r_m)
    r_w = float(r_w)

    #Posterior probabilities
    # for X = {x_1, ..., x_T}
    # P(i|x_t) = w_i * p_i(x_t) / sum_j=1_M(w_j * P_j(x_t))

    posterior = world.predict_proba(obs)
    #print np.shape(posterior)

    n_i = np.sum(posterior, 0)
    #print np.shape(n_i)
    E_i = np.zeros((world.n_states, np.size(obs, 1)), dtype=np.float)
    for i in range(world.n_states):
        #print np.shape(posterior[:,i]), np.shape(obs), np.shape(n_i[i])
        post_i = np.reshape(posterior[:, i], (np.size(obs, 0), 1))
        #print np.shape(post_i)
        E_i[i, :] = np.sum(post_i * obs, 0) / n_i[i]

    a_i_mean = np.reshape(n_i / (n_i + r_m), (world.n_states, 1))
    a_i_weight = n_i / (n_i + r_w)

    T = float(np.size(obs, 0))

    w_hat = a_i_weight * n_i / T + (1.0 - a_i_weight) * world.weights
    w_hat = w_hat / np.sum(w_hat)

    #print np.shape(E_i), np.shape(world.means), np.shape(a_i_mean)
    u_hat = a_i_mean * E_i + (1.0 - a_i_mean) * world.means

    return w_hat, u_hat
Esempio n. 11
0
def train_gmm(obs, world=None, params={'num_mixtures': 1024}):
    """ Trains a GMM using N mixtures from given observations.
    
    Usage:  world = train_gmm(obs, world, params)
    
    Inputs:
    obs -- MxN Numpy array of observations (M observations to be clustered in N-dimensional space)
    world -- Previous world model to use for initialization (if not supplied, will initialize automagically)
    params -- {'num_mixtures': X} Use X mixtures in model, defaults to 1024
    
    Outputs:
    world -- GMM class output (contains means, weights, covars)
    """    
    
    if log(params['num_mixtures'],2) % 1:
        logging.warn('Number of mixtures not power of 2.  Will run slow.')
    
    if not world:
        world = mixture.GMM(n_states=params['num_mixtures'])
        world.fit(obs)
    else:
        world.fit(obs, init_params='')
    
    return world
features = shapeAnalysis(mask)
features += colorAnalysis(hsv, mask)
#features += featurePoints(greyscaleImg, mask)
print "features : ", features

nbFeatures = len(features)

n, m = 100, 2

# generate random sample, two components
np.random.seed(0)
C = np.random.randn(n, nbFeatures)
X = np.r_[np.dot(np.random.randn(nbFeatures, n), C),
          np.random.randn(n, nbFeatures) + C]

clf = mixture.GMM(n_states=10, cvtype='tied')
clf.fit(X)
print clf.decode(X)

splot = pl.subplot(111, aspect='equal')
color_iter = itertools.cycle(['r', 'g', 'b', 'c'])

Y_ = clf.predict(X)

for i, (mean, covar, color) in enumerate(zip(clf.means, clf.covars,
                                             color_iter)):
    v, w = np.linalg.eigh(covar)
    u = w[0] / np.linalg.norm(w[0])
    pl.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color)
    angle = np.arctan(u[1] / u[0])
    angle = 180 * angle / np.pi  # convert to degrees
Esempio n. 13
0
import numpy as np
from scikits.learn import mixture
import itertools

import pylab as pl
import matplotlib as mpl

n, m = 300, 2

# generate random sample, two components
np.random.seed(0)
C = np.array([[0., -0.7], [3.5, .7]])
X = np.r_[np.dot(np.random.randn(n, 2), C),
          np.random.randn(n, 2) + np.array([3, 3])]

clf = mixture.GMM(n_states=2, cvtype='full')
clf.fit(X)

splot = pl.subplot(111, aspect='equal')
color_iter = itertools.cycle(['r', 'g', 'b', 'c'])

Y_ = clf.predict(X)

for i, (mean, covar, color) in enumerate(zip(clf.means, clf.covars,
                                             color_iter)):
    v, w = np.linalg.eigh(covar)
    u = w[0] / np.linalg.norm(w[0])
    pl.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color)
    angle = np.arctan(u[1] / u[0])
    angle = 180 * angle / np.pi  # convert to degrees
    ell = mpl.patches.Ellipse(mean, v[0], v[1], 180 + angle, color=color)