def test_lmvnpdftied_consistent_with_lmvnpdffull(self): nstates = 4 ndim = 20 nobs = 200 mu = np.random.randint(10) * np.random.rand(nstates, ndim) tiedcv = _generate_random_spd_matrix(ndim) obs = np.random.randint(10) * np.random.rand(nobs, ndim) cv = np.tile(tiedcv, (nstates, 1, 1)) reference = gmm.lmvnpdf(obs, mu, cv, "full") lpr = gmm.lmvnpdf(obs, mu, tiedcv, "tied") assert_array_almost_equal(lpr, reference)
def clustering(self,data,mode="kmeans",itermax=100,threshold=1.0e-2): """ clustering data input data : 2d-array, data to be clusterd mode : strings, clustering method, "kmeans" or "gmm" itermax : int, max cycle of iteration in gmm train threshold : float, gmm iteration is converged if changes are below it return codes : array, codes[i] is the corresponding cluster of data[i] score : array, in kmeans, score is the distance, in gmm probability iter_log : array, log of iteration (only gmm) aic : float, AIC of the model (only gmm) bic : float, BIC of the model (only gmm) """ print "running %s mode" % mode if mode == "kmeans": # run kmeans mode self.sort_order = False # ascending order # get centroids centroids, var = kmeans(data,self.nclust) # assign data to the nearest cluster codes, distances = vq(data,centroids) return codes,distances elif mode == "gmm": # run gmm mode self.sort_order = True # descending order # construct a GMM instance self.gm = gmm.GMM(self.nclust,cvtype="full") # train gmm iter_log = self.gm.fit(data,itermax,thresh=threshold) if len(iter_log) == itermax : stderr.write("warning!! EM step not converged\n") # assign data to the nearest cluster logprobs, codes = self.gm.decode(data) # calc probability that each datum belongs to corresponding cluster lpr = gmm.lmvnpdf(data,self.gm.means, \ self.gm._covars,self.gm._cvtype) probs = np.array([lpr[i,codes[i]] for i in range(len(lpr))]) # calc AIC and BIC for model evaluation aic = AIC(iter_log[-1],data,self.nclust) bic = BIC(iter_log[-1],data,self.nclust) return codes,probs,iter_log,aic,bic else: raise ClusteringModeError(mode)
def _test_lmvnpdfspherical(self, ndim, nstates, nobs=100): mu = np.random.randint(10) * np.random.rand(nstates, ndim) spherecv = np.random.rand(nstates, 1) ** 2 + 1 obs = np.random.randint(10) * np.random.rand(nobs, ndim) cv = np.tile(spherecv, (ndim, 1)) reference = self._slow_lmvnpdfdiag(obs, mu, cv) lpr = gmm.lmvnpdf(obs, mu, spherecv, "spherical") assert_array_almost_equal(lpr, reference)
def _test_lmvnpdftied_with_diagonal_covariance(self, ndim, nstates, nobs=100): mu = np.random.randint(10) * np.random.rand(nstates, ndim) tiedcv = (np.random.rand(ndim) + 1.0) ** 2 obs = np.random.randint(10) * np.random.rand(nobs, ndim) cv = np.tile(tiedcv, (nstates, 1)) reference = self._slow_lmvnpdfdiag(obs, mu, cv) lpr = gmm.lmvnpdf(obs, mu, np.diag(tiedcv), "tied") assert_array_almost_equal(lpr, reference)
def _test_lmvnpdffull_with_diagonal_covariance(self, ndim, nstates, nobs=100): mu = np.random.randint(10) * np.random.rand(nstates, ndim) cv = (np.random.rand(nstates, ndim) + 1.0) ** 2 obs = np.random.randint(10) * np.random.rand(nobs, ndim) fullcv = np.array([np.diag(x) for x in cv]) reference = self._slow_lmvnpdfdiag(obs, mu, cv) lpr = gmm.lmvnpdf(obs, mu, fullcv, "full") assert_array_almost_equal(lpr, reference)
def test_lmvnpdf_spherical(): n_dim, n_states, n_obs = 2, 3, 10 mu = np.random.randint(10) * np.random.rand(n_states, n_dim) spherecv = np.random.rand(n_states, 1) ** 2 + 1 obs = np.random.randint(10) * np.random.rand(n_obs, n_dim) cv = np.tile(spherecv, (n_dim, 1)) reference = _naive_lmvnpdf_diag(obs, mu, cv) lpr = gmm.lmvnpdf(obs, mu, spherecv, 'spherical') assert_array_almost_equal(lpr, reference)
def _test_lmvnpdfdiag(self, ndim, nstates, nobs=100): # test the slow and naive implementation of lmvnpdf and # compare it to the vectorized version (gmm.lmvnpdf) to test # for correctness mu = np.random.randint(10) * np.random.rand(nstates, ndim) cv = (np.random.rand(nstates, ndim) + 1.0) ** 2 obs = np.random.randint(10) * np.random.rand(nobs, ndim) reference = self._slow_lmvnpdfdiag(obs, mu, cv) lpr = gmm.lmvnpdf(obs, mu, cv, "diag") assert_array_almost_equal(lpr, reference)
def test_lmvnpdf_full(): n_dim, n_states, n_obs = 2, 3, 10 mu = np.random.randint(10) * np.random.rand(n_states, n_dim) cv = (np.random.rand(n_states, n_dim) + 1.0) ** 2 obs = np.random.randint(10) * np.random.rand(n_obs, n_dim) fullcv = np.array([np.diag(x) for x in cv]) reference = _naive_lmvnpdf_diag(obs, mu, cv) lpr = gmm.lmvnpdf(obs, mu, fullcv, 'full') assert_array_almost_equal(lpr, reference)
def test_lmvnpdf_diag(): """ test a slow and naive implementation of lmvnpdf and compare it to the vectorized version (gmm.lmvnpdf) to test for correctness """ n_dim, n_states, n_obs = 2, 3, 10 mu = np.random.randint(10) * np.random.rand(n_states, n_dim) cv = (np.random.rand(n_states, n_dim) + 1.0) ** 2 obs = np.random.randint(10) * np.random.rand(n_obs, n_dim) ref = _naive_lmvnpdf_diag(obs, mu, cv) lpr = gmm.lmvnpdf(obs, mu, cv, 'diag') assert_array_almost_equal(lpr, ref)