Python purity Examples

Programming Language: Python

Namespace/Package Name: uncurl.evaluation

Method/Function: purity

Examples at hotexamples.com: 7

Python purity - 7 examples found. These are the top rated real world Python examples of uncurl.evaluation.purity extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

 def test_negative_binomial(self):
     """
     Test NB log-likelihood, nb_cluster
     """
     P = np.array([[0.5,0.4,0.8],
                   [0.5,0.3,0.7],
                   [0.5,0.3,0.9]])
     R = np.array([[1.,8.,10.],
                   [2.,8.,24],
                   [3.,6.,30.]])
     data, labels = simulation.generate_nb_data(P, R, 100)
     data = data.astype(float)
     #data += 1e-8
     ll = nb_ll(data, P, R)
     self.assertEqual(ll.shape, (100,3))
     self.assertFalse(np.isnan(ll).any())
     self.assertFalse(np.isinf(ll).any())
     # test derivative
     # test nb cluster
     # how to test the results... they're often not good...
     a,p,r = nb_cluster(data,3)
     self.assertEqual(p.shape, P.shape)
     self.assertEqual(r.shape, R.shape)
     p_nans = np.isnan(p)
     r_nans = np.isnan(r)
     self.assertFalse(p_nans.any())
     self.assertFalse(r_nans.any())
     # assert that all the points aren't being put into
     # the same cluster.
     self.assertTrue(purity(labels, a) > 0.8)
     self.assertFalse((a==a[0]).all())

Example #2

Show file

File: test_cluster_sparse.py Project: yjzhang/uncurl_python

 def test_cluster(self):
     data = self.data
     assignments, centers = uncurl.poisson_cluster(data, 2)
     self.assertEqual(assignments.shape[0], data.shape[1])
     self.assertEqual(centers.shape[0], data.shape[0])
     # just checking that the values are valid
     self.assertFalse(np.isnan(centers).any())
     self.assertTrue(purity(assignments, self.labs) > 0.8)

Example #3

Show file

File: test_cluster_sparse.py Project: yjzhang/uncurl_python

 def test_simulation(self):
     """
     Basically this is to test that the Poisson EM can correctly separate
     clusters in simulated data.
     """
     centers = np.array([[1,10,20], [1, 11, 1], [50, 1, 100]])
     centers = centers.astype(float)
     data, labs = generate_poisson_data(centers, 500)
     data = data.astype(float)
     data = sparse.csc_matrix(data)
     assignments, c_centers = uncurl.poisson_cluster(data, 3)
     distances = np.zeros((3,3))
     for i in range(3):
         for j in range(3):
             distances[i,j] = uncurl.poisson_dist(centers[:,i], c_centers[:,j])
     print(assignments)
     print(labs)
     print(purity(assignments, labs))
     self.assertTrue(purity(assignments, labs) > 0.65)

Example #4

Show file

 def test_zip_simulation(self):
     """
     ZIP clustering on poisson-simulated data
     """
     centers = np.array([[0.1, 10, 20], [0.1, 11, 0.1], [50, 0.1, 100]])
     centers = centers.astype(float)
     data, labs = generate_poisson_data(centers, 500)
     data = data.astype(float)
     assignments, c_centers, c_zeros = uncurl.zip_cluster(data, 3)
     self.assertTrue(purity(assignments, labs) > 0.8)

Example #5

Show file

 def test_zip_simulation_2(self):
     """
     ZIP clustering on ZIP-simulated data
     """
     centers = np.random.randint(10, 1000, (3, 3))
     L = np.random.random((3, 3))
     print(centers)
     print(L)
     centers = centers.astype(float)
     data, labs = generate_zip_data(centers, L, 1000)
     data = data.astype(float)
     print(data)
     assignments, c_centers, c_zeros = uncurl.zip_cluster(data, 3)
     distances = np.zeros((3, 3))
     for i in range(3):
         for j in range(3):
             distances[i, j] = uncurl.poisson_dist(centers[:, i],
                                                   c_centers[:, j])
     print(c_centers)
     print(c_zeros)
     print(purity(assignments, labs))
     self.assertTrue(purity(assignments, labs) > 0.6)

Example #6

Show file

 def test_random_1(self):
     """
     Test NB state estimation with random parameters
     """
     M, W, R = simulation.generate_nb_states(2, 200, 20)
     data = simulation.generate_nb_state_data(M, W, R)
     M_noised = M + 0.1*(np.random.random(M.shape)-0.5)
     M_, W_, R_, ll = nb_state_estimation.nb_estimate_state(data, 2, init_means=M_noised, R = R, disp=False)
     c1 = W.argmax(0)
     c2 = W_.argmax(0)
     p = purity(c2, c1)
     print(p)
     print(data)
     print(M)
     print(M_)
     self.assertTrue(p > 0.7)

Example #7

Show file

if __name__ == '__main__':
    dat = loadmat('data/SCDE_test.mat')
    data = dat['dat'].toarray()
    centers, assignments = uncurl.kmeans_pp(data, 2)
    lls = uncurl.poisson_ll(data, centers)
    # Poisson clustering
    assignments_poisson, centers = uncurl.poisson_cluster(data,
                                                          2,
                                                          init=centers)
    # NB clustering
    assignments_nb, P, R = uncurl.nb_cluster(data, 2)
    # ZIP clustering
    assignments_zip, M, L = uncurl.zip_cluster(data, 2)
    true_labs = dat['Lab'][0]
    print 'poisson purity:', purity(assignments_poisson, true_labs)
    print 'NB purity:', purity(assignments_nb, true_labs)
    print 'ZIP purity:', purity(assignments_zip, true_labs)
    # State estimation
    means, weights, ll = uncurl.poisson_estimate_state(data, 2, disp=False)
    w_classes = weights.argmax(0)
    print 'W argmax purity:', purity(w_classes, true_labs)
    # dimensionality reduction
    X = uncurl.dim_reduce(means, weights, 2)
    proj = np.dot(X, weights)
    # plotting dimensionality reduction
    plt.cla()
    # weight plot
    plt.title('Dimensionality reduction plot - assigned weight labels')
    plt.scatter(proj[0, :], proj[1, :], s=100, cmap='seismic', c=weights[0, :])
    plt.xlabel('dim 1')