예제 #1
0
    def test_lmvnpdftied_consistent_with_lmvnpdffull(self):
        nstates = 4
        ndim = 20
        nobs = 200

        mu = np.random.randint(10) * np.random.rand(nstates, ndim)
        tiedcv = _generate_random_spd_matrix(ndim)
        obs = np.random.randint(10) * np.random.rand(nobs, ndim)

        cv = np.tile(tiedcv, (nstates, 1, 1))

        reference = gmm.lmvnpdf(obs, mu, cv, "full")
        lpr = gmm.lmvnpdf(obs, mu, tiedcv, "tied")
        assert_array_almost_equal(lpr, reference)
예제 #2
0
 def clustering(self,data,mode="kmeans",itermax=100,threshold=1.0e-2):
   """
   clustering data
   
   input 
     data : 2d-array, data to be clusterd
     mode : strings, clustering method, "kmeans" or "gmm"
     itermax : int, max cycle of iteration in gmm train
     threshold : float, gmm iteration is converged if changes are below it
     
   return
     codes : array, codes[i] is the corresponding cluster of data[i]
     score : array, in kmeans, score is the distance, in gmm probability
     iter_log : array, log of iteration (only gmm)
     aic : float, AIC of the model (only gmm)
     bic : float, BIC of the model (only gmm)
   """
   
   print "running %s mode" % mode
   
   if mode == "kmeans": # run kmeans mode
     self.sort_order = False # ascending order
     
     # get centroids
     centroids, var = kmeans(data,self.nclust)
     
     # assign data to the nearest cluster
     codes, distances = vq(data,centroids)
     
     return codes,distances
   
   elif mode == "gmm": # run gmm mode
     
     self.sort_order = True # descending order
     
     # construct a GMM instance
     self.gm = gmm.GMM(self.nclust,cvtype="full")
     
     # train gmm 
     iter_log = self.gm.fit(data,itermax,thresh=threshold)
     if len(iter_log) == itermax :
       stderr.write("warning!! EM step not converged\n")
     
     # assign data to the nearest cluster
     logprobs, codes = self.gm.decode(data)
     
     # calc probability that each datum belongs to corresponding cluster 
     lpr = gmm.lmvnpdf(data,self.gm.means, \
                       self.gm._covars,self.gm._cvtype)
     probs = np.array([lpr[i,codes[i]] for i in range(len(lpr))])
     
     # calc AIC and BIC for model evaluation
     aic = AIC(iter_log[-1],data,self.nclust)
     bic = BIC(iter_log[-1],data,self.nclust)
     
     return codes,probs,iter_log,aic,bic
   
   else:
     raise ClusteringModeError(mode)
예제 #3
0
    def _test_lmvnpdfspherical(self, ndim, nstates, nobs=100):
        mu = np.random.randint(10) * np.random.rand(nstates, ndim)
        spherecv = np.random.rand(nstates, 1) ** 2 + 1
        obs = np.random.randint(10) * np.random.rand(nobs, ndim)

        cv = np.tile(spherecv, (ndim, 1))
        reference = self._slow_lmvnpdfdiag(obs, mu, cv)
        lpr = gmm.lmvnpdf(obs, mu, spherecv, "spherical")
        assert_array_almost_equal(lpr, reference)
예제 #4
0
    def _test_lmvnpdftied_with_diagonal_covariance(self, ndim, nstates, nobs=100):
        mu = np.random.randint(10) * np.random.rand(nstates, ndim)
        tiedcv = (np.random.rand(ndim) + 1.0) ** 2
        obs = np.random.randint(10) * np.random.rand(nobs, ndim)

        cv = np.tile(tiedcv, (nstates, 1))

        reference = self._slow_lmvnpdfdiag(obs, mu, cv)
        lpr = gmm.lmvnpdf(obs, mu, np.diag(tiedcv), "tied")
        assert_array_almost_equal(lpr, reference)
예제 #5
0
    def _test_lmvnpdffull_with_diagonal_covariance(self, ndim, nstates, nobs=100):
        mu = np.random.randint(10) * np.random.rand(nstates, ndim)
        cv = (np.random.rand(nstates, ndim) + 1.0) ** 2
        obs = np.random.randint(10) * np.random.rand(nobs, ndim)

        fullcv = np.array([np.diag(x) for x in cv])

        reference = self._slow_lmvnpdfdiag(obs, mu, cv)
        lpr = gmm.lmvnpdf(obs, mu, fullcv, "full")
        assert_array_almost_equal(lpr, reference)
예제 #6
0
def test_lmvnpdf_spherical():
    n_dim, n_states, n_obs = 2, 3, 10

    mu = np.random.randint(10) * np.random.rand(n_states, n_dim)
    spherecv = np.random.rand(n_states, 1) ** 2 + 1
    obs = np.random.randint(10) * np.random.rand(n_obs, n_dim)

    cv = np.tile(spherecv, (n_dim, 1))
    reference = _naive_lmvnpdf_diag(obs, mu, cv)
    lpr = gmm.lmvnpdf(obs, mu, spherecv, 'spherical')
    assert_array_almost_equal(lpr, reference)
예제 #7
0
    def _test_lmvnpdfdiag(self, ndim, nstates, nobs=100):
        # test the slow and naive implementation of lmvnpdf and
        # compare it to the vectorized version (gmm.lmvnpdf) to test
        # for correctness
        mu = np.random.randint(10) * np.random.rand(nstates, ndim)
        cv = (np.random.rand(nstates, ndim) + 1.0) ** 2
        obs = np.random.randint(10) * np.random.rand(nobs, ndim)

        reference = self._slow_lmvnpdfdiag(obs, mu, cv)
        lpr = gmm.lmvnpdf(obs, mu, cv, "diag")
        assert_array_almost_equal(lpr, reference)
예제 #8
0
def test_lmvnpdf_full():
    n_dim, n_states, n_obs = 2, 3, 10

    mu = np.random.randint(10) * np.random.rand(n_states, n_dim)
    cv = (np.random.rand(n_states, n_dim) + 1.0) ** 2
    obs = np.random.randint(10) * np.random.rand(n_obs, n_dim)

    fullcv = np.array([np.diag(x) for x in cv])

    reference = _naive_lmvnpdf_diag(obs, mu, cv)
    lpr = gmm.lmvnpdf(obs, mu, fullcv, 'full')
    assert_array_almost_equal(lpr, reference)
예제 #9
0
def test_lmvnpdf_diag():
    """
    test a slow and naive implementation of lmvnpdf and
    compare it to the vectorized version (gmm.lmvnpdf) to test
    for correctness
    """
    n_dim, n_states, n_obs = 2, 3, 10
    mu = np.random.randint(10) * np.random.rand(n_states, n_dim)
    cv = (np.random.rand(n_states, n_dim) + 1.0) ** 2
    obs = np.random.randint(10) * np.random.rand(n_obs, n_dim)

    ref = _naive_lmvnpdf_diag(obs, mu, cv)
    lpr = gmm.lmvnpdf(obs, mu, cv, 'diag')
    assert_array_almost_equal(lpr, ref)