def gmm(data, k): """Cluster based on gaussian mixture models Parameters ---------- data : dict features structure k : int number of clusters Returns ------- cl : int array cluster indicies Notes ----- This function requires scikits-learn """ try: clf = mixture.GMM(n_states=k, cvtype='full') except TypeError: clf = mixture.GMM(n_components=k, cvtype='full') clf.fit(data) cl = clf.predict(data) return cl
def classify_speech(energy): """ Trains a two mixture GMM based on the energy component in each frame. Usage: energy_segmentation = classify_speech(energy) Inputs: energy -- Numpy array of energy in each frame Outputs: energy_segmentation -- Numpy array of detected speech (1=speech, 0=silence) """ g = mixture.GMM(n_states=2) g.fit(energy) #Higher energy indicates speech, location in GMM mean #vector can vary based on initial conditions if g.means[0] > g.means[1]: energy_tag = 0 silence_tag = 1 else: energy_tag = 1 silence_tag = 0 speech_predict = g.predict(energy) #Return 1 where speech detected, 0 otherwise energy_segmentation = np.zeros(np.shape(energy), dtype=np.int) energy_segmentation[speech_predict == energy_tag] = 1 energy_segmentation[speech_predict == silence_tag] = 0 return energy_segmentation
def test_train(self, params='wmc'): g = mixture.GMM(self.n_states, self.cvtype) g.weights = self.weights g.means = self.means g._covars = 20 * self.covars[self.cvtype] # Create a training set by sampling from the predefined distribution. train_obs = g.rvs(n_samples=100) g.fit(train_obs, n_iter=0, init_params=params) # Do one training iteration at a time so we can keep track of # the log likelihood to make sure that it increases after each # iteration. trainll = [] for iter in xrange(5): g.fit(train_obs, n_iter=1, params=params, init_params='', min_covar=1e-1) trainll.append(g.score(train_obs).sum()) # Note that the log likelihood will sometimes decrease by a # very small amount after it has more or less converged due to # the addition of min_covar to the covariance (to prevent # underflow). This is why the threshold is set to -0.5 # instead of 0. self.assertTrue(np.all(np.diff(trainll) > -0.5))
def create_random_gmm(n_mix, n_features, cvtype): from scikits.learn import mixture g = mixture.GMM(n_mix, cvtype=cvtype) g.means = np.random.randint(-20, 20, (n_mix, n_features)) mincv = 0.1 <<<<<<< REMOTE
def test_GMM_attributes(): n_states, n_features = 10, 4 cvtype = 'diag' g = mixture.GMM(n_states, cvtype) weights = np.random.rand(n_states) weights = weights / weights.sum() means = np.random.randint(-20, 20, (n_states, n_features)) assert g.n_states == n_states assert g.cvtype == cvtype g.weights = weights assert_array_almost_equal(g.weights, weights) assert_raises(ValueError, g.__setattr__, 'weights', 2 * weights) assert_raises(ValueError, g.__setattr__, 'weights', []) assert_raises(ValueError, g.__setattr__, 'weights', np.zeros((n_states - 2, n_features))) g.means = means assert_array_almost_equal(g.means, means) assert_raises(ValueError, g.__setattr__, 'means', []) assert_raises(ValueError, g.__setattr__, 'means', np.zeros((n_states - 2, n_features))) covars = (0.1 + 2 * np.random.rand(n_states, n_features))**2 g._covars = covars assert_array_almost_equal(g._covars, covars) assert_raises(ValueError, g.__setattr__, 'covars', []) assert_raises(ValueError, g.__setattr__, 'covars', np.zeros((n_states - 2, n_features))) assert_raises(ValueError, mixture.GMM, n_states=20, cvtype='badcvtype')
def test_rvs(self, n=100): g = mixture.GMM(self.n_states, self.cvtype) # Make sure the means are far apart so posteriors.argmax() # picks the actual component used to generate the observations. g.means = 20 * self.means g._covars = np.maximum(self.covars[self.cvtype], 0.1) g.weights = self.weights samples = g.rvs(n) self.assertEquals(samples.shape, (n, self.n_features))
def convert_type_db_gmm(u): """ Given compressed numpy arrays that specify a GMM from database convert back to type scikits.learn.GMM """ world = mixture.GMM(n_states=np.size(u.ubm_means, 0)) world.means = u.ubm_means vars_diag = np.zeros((np.size(u.ubm_vars, 0), np.size(u.ubm_vars, 1)), dtype=np.float) for i in range(np.size(vars_diag, 0)): vars_diag[i, :] = np.diag(u.ubm_vars[i]) world.covars = vars_diag world.weights = np.reshape(u.ubm_weights, (np.size(u.ubm_weights, 0), )) return world
def test_eval(self): g = mixture.GMM(self.n_states, self.cvtype) # Make sure the means are far apart so posteriors.argmax() # picks the actual component used to generate the observations. g.means = 20 * self.means g._covars = self.covars[self.cvtype] g.weights = self.weights gaussidx = np.repeat(range(self.n_states), 5) nobs = len(gaussidx) obs = np.random.randn(nobs, self.n_features) + g.means[gaussidx] ll, posteriors = g.eval(obs) self.assertEqual(len(ll), nobs) self.assertEqual(posteriors.shape, (nobs, self.n_states)) assert_array_almost_equal(posteriors.sum(axis=1), np.ones(nobs)) assert_array_equal(posteriors.argmax(axis=1), gaussidx)
def create_random_gmm(n_mix, n_features, cvtype): from scikits.learn import mixture g = mixture.GMM(n_mix, cvtype=cvtype) g.means = np.random.randint(-20, 20, (n_mix, n_features)) mincv = 0.1 g.covars = { 'spherical': (mincv + mincv * np.random.rand(n_mix))**2, 'tied': _generate_random_spd_matrix(n_features) + mincv * np.eye(n_features), 'diag': (mincv + mincv * np.random.rand(n_mix, n_features))**2, 'full': np.array([ _generate_random_spd_matrix(n_features) + mincv * np.eye(n_features) for x in xrange(n_mix) ]) }[cvtype] g.weights = hmm.normalize(np.random.rand(n_mix)) return g
def map_adapt_speaker(obs, world, r_m=16.0, r_w=16.0): # Catch fatal errors assert isinstance(world, type(mixture.GMM())) assert np.size(obs, 1) == np.size(world.means, 1) assert np.size(obs, 0) > 1 #Cast if passed in int type r_m = float(r_m) r_w = float(r_w) #Posterior probabilities # for X = {x_1, ..., x_T} # P(i|x_t) = w_i * p_i(x_t) / sum_j=1_M(w_j * P_j(x_t)) posterior = world.predict_proba(obs) #print np.shape(posterior) n_i = np.sum(posterior, 0) #print np.shape(n_i) E_i = np.zeros((world.n_states, np.size(obs, 1)), dtype=np.float) for i in range(world.n_states): #print np.shape(posterior[:,i]), np.shape(obs), np.shape(n_i[i]) post_i = np.reshape(posterior[:, i], (np.size(obs, 0), 1)) #print np.shape(post_i) E_i[i, :] = np.sum(post_i * obs, 0) / n_i[i] a_i_mean = np.reshape(n_i / (n_i + r_m), (world.n_states, 1)) a_i_weight = n_i / (n_i + r_w) T = float(np.size(obs, 0)) w_hat = a_i_weight * n_i / T + (1.0 - a_i_weight) * world.weights w_hat = w_hat / np.sum(w_hat) #print np.shape(E_i), np.shape(world.means), np.shape(a_i_mean) u_hat = a_i_mean * E_i + (1.0 - a_i_mean) * world.means return w_hat, u_hat
def train_gmm(obs, world=None, params={'num_mixtures': 1024}): """ Trains a GMM using N mixtures from given observations. Usage: world = train_gmm(obs, world, params) Inputs: obs -- MxN Numpy array of observations (M observations to be clustered in N-dimensional space) world -- Previous world model to use for initialization (if not supplied, will initialize automagically) params -- {'num_mixtures': X} Use X mixtures in model, defaults to 1024 Outputs: world -- GMM class output (contains means, weights, covars) """ if log(params['num_mixtures'],2) % 1: logging.warn('Number of mixtures not power of 2. Will run slow.') if not world: world = mixture.GMM(n_states=params['num_mixtures']) world.fit(obs) else: world.fit(obs, init_params='') return world
features = shapeAnalysis(mask) features += colorAnalysis(hsv, mask) #features += featurePoints(greyscaleImg, mask) print "features : ", features nbFeatures = len(features) n, m = 100, 2 # generate random sample, two components np.random.seed(0) C = np.random.randn(n, nbFeatures) X = np.r_[np.dot(np.random.randn(nbFeatures, n), C), np.random.randn(n, nbFeatures) + C] clf = mixture.GMM(n_states=10, cvtype='tied') clf.fit(X) print clf.decode(X) splot = pl.subplot(111, aspect='equal') color_iter = itertools.cycle(['r', 'g', 'b', 'c']) Y_ = clf.predict(X) for i, (mean, covar, color) in enumerate(zip(clf.means, clf.covars, color_iter)): v, w = np.linalg.eigh(covar) u = w[0] / np.linalg.norm(w[0]) pl.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color) angle = np.arctan(u[1] / u[0]) angle = 180 * angle / np.pi # convert to degrees
import numpy as np from scikits.learn import mixture import itertools import pylab as pl import matplotlib as mpl n, m = 300, 2 # generate random sample, two components np.random.seed(0) C = np.array([[0., -0.7], [3.5, .7]]) X = np.r_[np.dot(np.random.randn(n, 2), C), np.random.randn(n, 2) + np.array([3, 3])] clf = mixture.GMM(n_states=2, cvtype='full') clf.fit(X) splot = pl.subplot(111, aspect='equal') color_iter = itertools.cycle(['r', 'g', 'b', 'c']) Y_ = clf.predict(X) for i, (mean, covar, color) in enumerate(zip(clf.means, clf.covars, color_iter)): v, w = np.linalg.eigh(covar) u = w[0] / np.linalg.norm(w[0]) pl.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color) angle = np.arctan(u[1] / u[0]) angle = 180 * angle / np.pi # convert to degrees ell = mpl.patches.Ellipse(mean, v[0], v[1], 180 + angle, color=color)