def test_random_means_cluster_init(self): """ Test state estimation with random means and weights. 20 cells, 200 genes, 2 clusters """ sim_m, sim_w = simulation.generate_poisson_states(2, 20, 200) sim_data = simulation.generate_state_data(sim_m, sim_w) sim_data = sparse.csc_matrix(sim_data) m, w, ll = state_estimation.poisson_estimate_state( sim_data, 2, max_iters=10, disp=False, initialization='cluster') obj = sparse_objective(sim_data.data, sim_data.indices, sim_data.indptr, 20, 200, m, w) self.assertEqual(ll, obj) dense_obj = objective(sim_data.toarray(), m, w) self.assertTrue(np.abs(obj - dense_obj) < 1e-6) means_good = False weights_good = False for p in itertools.permutations([0, 1]): means_good = means_good or (np.mean(np.abs(sim_m - m[:, p])) < 20.0) weights_good = weights_good or (np.mean(np.abs(sim_w - w[p, :])) < 0.2) self.assertTrue(means_good) self.assertTrue(weights_good)
def test_pseudotime(self): """ Test pseudotime calculations """ M, W = simulation.generate_poisson_lineage(3, 100, 50) sim_data = simulation.generate_state_data(M, W) sim_data = sim_data + 1e-8 m2 = M + np.random.random(M.shape) - 0.5 curves, fitted_vals, edges, assignments = lineage(m2, W) ptime = pseudotime(0, edges, fitted_vals) # assert that the cells are generally increasing in ptime # test each cluster old_p = 0 for i in range(100): p = ptime[i] self.assertTrue(p >= old_p) old_p = p old_p = 0 for i in range(100, 200): p = ptime[i] self.assertTrue(p >= old_p) self.assertTrue(p > 0) old_p = p old_p = 0 for i in range(200, 300): p = ptime[i] self.assertTrue(p >= old_p) self.assertTrue(p > 0) old_p = p
def test_random_means_lbfgs(self): """ Test state estimation with random means and weights. 200 cells, 20 genes, 2 clusters """ sim_m, sim_w = simulation.generate_poisson_states(2, 200, 20) sim_data = simulation.generate_state_data(sim_m, sim_w) sim_data = sparse.csc_matrix(sim_data) #sim_means_noised = sim_m + 5*(np.random.random(sim_m.shape)-0.5) m, w, ll = state_estimation.poisson_estimate_state(sim_data, 2, max_iters=10, disp=False, method='L-BFGS-B') self.assertTrue(np.max(w.sum(0) - 1.0) < 0.001) means_good = False weights_good = False for p in itertools.permutations([0, 1]): means_good = means_good or (np.mean(np.abs(sim_m - m[:, p])) < 20.0) weights_good = weights_good or (np.mean(np.abs(sim_w - w[p, :])) < 0.3) self.assertTrue(means_good) self.assertTrue(weights_good)
def test_state_estimation(self): """ Generate sample data from a small set to see that the state estimation is accurate. 7 cells, 4 genes, 2 clusters """ sim_means = np.array([[20., 30.], [10., 3.], [90., 50.], [10., 4.]]) sim_assignments = np.array([[0.1, 0.2, 0.3, 0.4, 0.5, 0.8, 0.9], [0.9, 0.8, 0.7, 0.6, 0.5, 0.2, 0.1]]) sim_data = simulation.generate_state_data(sim_means, sim_assignments) sim_data = sparse.csc_matrix(sim_data) print(sim_data) # add noise to the mean sim_means_noised = sim_means + 5 * (np.random.random(sim_means.shape) - 0.5) m, w, ll = state_estimation.poisson_estimate_state( sim_data, 2, init_means=sim_means_noised, max_iters=10, disp=False) print(m) print(w) self.assertTrue(np.max(w.sum(0) - 1.0) < 0.01) # mean error in M is less than 5 self.assertTrue( np.mean(np.abs(sim_means - m)) < 10.0 or np.mean(np.abs(sim_means - m[:, [1, 0]])) < 10.0) # mean error in W is less than 0.2 (arbitrary boundary) self.assertTrue( np.mean(np.abs(sim_assignments - w)) < 0.3 or np.mean(np.abs(sim_assignments - w[[1, 0], :])) < 0.3)
def test_random_means_2(self): """ Test state estimation with random means and weights. 20 cells, 200 genes, 2 clusters """ sim_m, sim_w = simulation.generate_poisson_states(2, 20, 200) sim_data = simulation.generate_state_data(sim_m, sim_w) sim_means_noised = sim_m + 5 * (np.random.random(sim_m.shape) - 0.5) m, w, ll = zip_state_estimation.zip_estimate_state( sim_data, 2, init_means=sim_means_noised, max_iters=10, disp=False) self.assertTrue(np.max(w.sum(0) - 1.0) < 0.001) self.assertTrue(np.mean(np.abs(sim_m - m)) < 60.0) self.assertTrue(np.mean(np.abs(sim_w - w)) < 0.5)
def test_lineage(self): """ Testing lineage using randomly generated lineage data """ M, W = simulation.generate_poisson_lineage(3, 100, 50) sim_data = simulation.generate_state_data(M, W) sim_data = sim_data + 1e-8 m2 = M + np.random.random(M.shape) - 0.5 curves, fitted_vals, edges, assignments = lineage(m2, W) # TODO: assert something about the distances??? print(len(edges)) adjacent_count = 0 for e in edges: if np.abs(e[0] - e[1]) <= 1: adjacent_count += 1 self.assertTrue(adjacent_count > 150)
def test_dim_reduce(self): """ Test dimensionality reduction using sample data """ sim_means = np.array([[20., 30., 1.], [10., 3., 8.], [90., 50., 20.], [10., 4., 30.]]) sim_assignments = np.array([[0.1, 0.2, 0.3, 0.4, 0.5, 0.1, 0.8], [0.5, 0.3, 0.2, 0.4, 0.2, 0.2, 0.1], [0.4, 0.5, 0.5, 0.2, 0.3, 0.7, 0.1]]) sim_data = simulation.generate_state_data(sim_means, sim_assignments) sim_data = sim_data + 1e-8 X = dim_reduce(sim_means, sim_assignments, 2) self.assertEqual(X.shape, (3, 2)) X2 = dim_reduce_data(sim_data, 2) self.assertEqual(X2.shape, (sim_data.shape[1], 2)) projections = np.dot(X.transpose(), sim_assignments) mds_proj = mds(sim_means, sim_assignments, 2) self.assertTrue(np.abs(mds_proj - projections).sum() < 1e-6)
def test_state_estimation_2(self): """ Generate sample data from a slightly larger set to see that the state estimation is accurate. 11 cells, 5 genes, 3 clusters This might fail due to inherent randomness... """ sim_means = np.array([[20., 30., 4.], [10., 3., 9.], [90., 50., 10.], [10., 4., 30.], [35., 10., 2.]]) sim_assignments = np.array( [[0.1, 0.2, 0.3, 0.4, 0.1, 0.7, 0.6, 0.9, 0.5, 0.2, 0.1], [0.6, 0.7, 0.3, 0.4, 0.1, 0.2, 0.1, 0.1, 0.0, 0.3, 0.8], [0.3, 0.1, 0.4, 0.2, 0.8, 0.1, 0.3, 0.0, 0.5, 0.5, 0.1]]) sim_data = simulation.generate_state_data(sim_means, sim_assignments) sim_data = sparse.csc_matrix(sim_data) print(sim_data) # add noise to the mean sim_means_noised = sim_means + 5 * (np.random.random(sim_means.shape) - 0.5) m, w, ll = state_estimation.poisson_estimate_state( sim_data, 3, init_means=sim_means_noised, max_iters=10, disp=False, parallel=False) print(m) print(w) print(w.sum(0)) self.assertTrue(np.max(w.sum(0) - 1.0) < 0.01) # mean error in M is less than 10 means_good = False weights_good = False # test every permutation of clusters for p in itertools.permutations([0, 1, 2]): means_good = means_good or (np.mean(np.abs(sim_means - m[:, p])) < 10.0) weights_good = weights_good or (np.mean( np.abs(sim_assignments - w[p, :])) < 0.2) self.assertTrue(means_good) self.assertTrue(weights_good)
def test_run_se(self): """ test the run_state_estimation function """ sim_m, sim_w = simulation.generate_poisson_states(2, 200, 20) sim_data = simulation.generate_state_data(sim_m, sim_w) m, w, ll = run_state_estimation(sim_data, 2, dist='Poiss', max_iters=10, disp=False) means_good = False weights_good = False for p in itertools.permutations([0, 1]): means_good = means_good or (np.mean(np.abs(sim_m - m[:, p])) < 20.0) weights_good = weights_good or (np.mean(np.abs(sim_w - w[p, :])) < 0.3) self.assertTrue(means_good) self.assertTrue(weights_good)
def test_random_means_2(self): """ Test state estimation with random means and weights. 20 cells, 200 genes, 2 clusters """ sim_m, sim_w = simulation.generate_poisson_states(2, 20, 200) sim_data = simulation.generate_state_data(sim_m, sim_w) sim_means_noised = sim_m + 5 * (np.random.random(sim_m.shape) - 0.5) m, w, ll = state_estimation.poisson_estimate_state( sim_data, 2, init_means=sim_means_noised, max_iters=10, disp=False) means_good = False weights_good = False for p in itertools.permutations([0, 1]): means_good = means_good or (np.mean(np.abs(sim_m - m[:, p])) < 20.0) weights_good = weights_good or (np.mean(np.abs(sim_w - w[p, :])) < 0.2) self.assertTrue(means_good) self.assertTrue(weights_good)