def test_negative_binomial(self): """ Test NB log-likelihood, nb_cluster """ P = np.array([[0.5,0.4,0.8], [0.5,0.3,0.7], [0.5,0.3,0.9]]) R = np.array([[1.,8.,10.], [2.,8.,24], [3.,6.,30.]]) data, labels = simulation.generate_nb_data(P, R, 100) data = data.astype(float) #data += 1e-8 ll = nb_ll(data, P, R) self.assertEqual(ll.shape, (100,3)) self.assertFalse(np.isnan(ll).any()) self.assertFalse(np.isinf(ll).any()) # test derivative # test nb cluster # how to test the results... they're often not good... a,p,r = nb_cluster(data,3) self.assertEqual(p.shape, P.shape) self.assertEqual(r.shape, R.shape) p_nans = np.isnan(p) r_nans = np.isnan(r) self.assertFalse(p_nans.any()) self.assertFalse(r_nans.any()) # assert that all the points aren't being put into # the same cluster. self.assertTrue(purity(labels, a) > 0.8) self.assertFalse((a==a[0]).all())
def test_cluster(self): data = self.data assignments, centers = uncurl.poisson_cluster(data, 2) self.assertEqual(assignments.shape[0], data.shape[1]) self.assertEqual(centers.shape[0], data.shape[0]) # just checking that the values are valid self.assertFalse(np.isnan(centers).any()) self.assertTrue(purity(assignments, self.labs) > 0.8)
def test_simulation(self): """ Basically this is to test that the Poisson EM can correctly separate clusters in simulated data. """ centers = np.array([[1,10,20], [1, 11, 1], [50, 1, 100]]) centers = centers.astype(float) data, labs = generate_poisson_data(centers, 500) data = data.astype(float) data = sparse.csc_matrix(data) assignments, c_centers = uncurl.poisson_cluster(data, 3) distances = np.zeros((3,3)) for i in range(3): for j in range(3): distances[i,j] = uncurl.poisson_dist(centers[:,i], c_centers[:,j]) print(assignments) print(labs) print(purity(assignments, labs)) self.assertTrue(purity(assignments, labs) > 0.65)
def test_zip_simulation(self): """ ZIP clustering on poisson-simulated data """ centers = np.array([[0.1, 10, 20], [0.1, 11, 0.1], [50, 0.1, 100]]) centers = centers.astype(float) data, labs = generate_poisson_data(centers, 500) data = data.astype(float) assignments, c_centers, c_zeros = uncurl.zip_cluster(data, 3) self.assertTrue(purity(assignments, labs) > 0.8)
def test_zip_simulation_2(self): """ ZIP clustering on ZIP-simulated data """ centers = np.random.randint(10, 1000, (3, 3)) L = np.random.random((3, 3)) print(centers) print(L) centers = centers.astype(float) data, labs = generate_zip_data(centers, L, 1000) data = data.astype(float) print(data) assignments, c_centers, c_zeros = uncurl.zip_cluster(data, 3) distances = np.zeros((3, 3)) for i in range(3): for j in range(3): distances[i, j] = uncurl.poisson_dist(centers[:, i], c_centers[:, j]) print(c_centers) print(c_zeros) print(purity(assignments, labs)) self.assertTrue(purity(assignments, labs) > 0.6)
def test_random_1(self): """ Test NB state estimation with random parameters """ M, W, R = simulation.generate_nb_states(2, 200, 20) data = simulation.generate_nb_state_data(M, W, R) M_noised = M + 0.1*(np.random.random(M.shape)-0.5) M_, W_, R_, ll = nb_state_estimation.nb_estimate_state(data, 2, init_means=M_noised, R = R, disp=False) c1 = W.argmax(0) c2 = W_.argmax(0) p = purity(c2, c1) print(p) print(data) print(M) print(M_) self.assertTrue(p > 0.7)
if __name__ == '__main__': dat = loadmat('data/SCDE_test.mat') data = dat['dat'].toarray() centers, assignments = uncurl.kmeans_pp(data, 2) lls = uncurl.poisson_ll(data, centers) # Poisson clustering assignments_poisson, centers = uncurl.poisson_cluster(data, 2, init=centers) # NB clustering assignments_nb, P, R = uncurl.nb_cluster(data, 2) # ZIP clustering assignments_zip, M, L = uncurl.zip_cluster(data, 2) true_labs = dat['Lab'][0] print 'poisson purity:', purity(assignments_poisson, true_labs) print 'NB purity:', purity(assignments_nb, true_labs) print 'ZIP purity:', purity(assignments_zip, true_labs) # State estimation means, weights, ll = uncurl.poisson_estimate_state(data, 2, disp=False) w_classes = weights.argmax(0) print 'W argmax purity:', purity(w_classes, true_labs) # dimensionality reduction X = uncurl.dim_reduce(means, weights, 2) proj = np.dot(X, weights) # plotting dimensionality reduction plt.cla() # weight plot plt.title('Dimensionality reduction plot - assigned weight labels') plt.scatter(proj[0, :], proj[1, :], s=100, cmap='seismic', c=weights[0, :]) plt.xlabel('dim 1')