def test_neighborhood_preserving_embedding(self): np.random.seed(1234) pts = np.random.random((5, 3)) expected = [[-0.433578], [0.761129], [-0.482382]] G = neighbor_graph(pts, k=3) actual = G.neighborhood_preserving_embedding(pts, num_vecs=1) assert_array_almost_equal(expected, actual)
def test_locally_linear_embedding(self): np.random.seed(1234) pts = np.random.random((5, 3)) expected = locally_linear_embedding(pts, 3, 1)[0] G = neighbor_graph(pts, k=3).barycenter_edge_weights(pts, copy=False) actual = G.locally_linear_embedding(num_dims=1) assert_signless_array_almost_equal(expected, actual)
def main(): np.random.seed(1234) X, theta = swiss_roll(8, 500, return_theta=True) print('Figure 1 of 3: bare coordinates in 3d') ax = Axes3D(plt.figure()) ax.scatter(*X.T, c=theta) print('Figure 2 of 3: 5-NN graph in original coordinates') g = neighbor_graph(X, k=5).symmetrize('max') g.plot(X, directed=False, weighted=False, fig='new', edge_style='k-', vertex_style=dict(c=theta)) print('Writing swiss_roll.html for force-directed layout demo') g.to_html('swiss_roll.html', directed=False, weighted=False, vertex_colors=theta) print('Figure 3 of 3: 2d Isomap embedding of 5-NN graph') emb = g.isomap(num_dims=2) _, ax = plt.subplots(figsize=(10, 5)) g.plot(emb, directed=False, weighted=False, ax=ax, edge_style='k-', vertex_style=dict(c=theta)) ax.xaxis.set_ticks([]) ax.yaxis.set_ticks([]) plt.show()
def main(): np.random.seed(1234) X, theta = swiss_roll(8, 300, return_theta=True, radius=0.5) GT = np.column_stack((theta, X[:, 1])) g = neighbor_graph(X, k=6) g = g.from_adj_matrix(g.matrix('dense')) ct = 12 _, axes = plt.subplots(nrows=2, ncols=2, figsize=(8, 8), sharex=True, sharey=True) _plot_diff(axes[0, 0], GT, g, g.minimum_spanning_subtree(), title='MST') _plot_diff(axes[0, 1], GT, g, g.circle_tear(cycle_len_thresh=ct), title='Circle Tear (%d)' % ct) _plot_diff(axes[1, 0], GT, g, g.cycle_cut(cycle_len_thresh=ct), title='Cycle Cut (%d)' % ct) _plot_diff(axes[1, 1], GT, g, g.isograph(), title='Isograph') plt.show()
def test_shortest_path_subtree(self): n = X.shape[0] G = neighbor_graph(X, k=4) e_data = [0.163, 0.199, 0.079, 0.188, 0.173, 0.122, 0.136, 0.136, 0.197] e_row = [3, 0, 14, 0, 0, 3, 0, 3, 3] e_col = [1, 3, 5, 7, 10, 13, 14, 18, 19] expected = np.zeros((n,n)) expected[e_row, e_col] = e_data spt = G.shortest_path_subtree(0, directed=True) assert_array_almost_equal(spt.matrix('dense'), expected, decimal=3) # test undirected case G.symmetrize(method='max', copy=False) e_data = [0.185,0.379,0.199,0.32,0.205,0.255,0.188,0.508,0.192,0.173,0.279, 0.258,0.122,0.136,0.316,0.326,0.278,0.136,0.197,0.185,0.379,0.199, 0.32,0.205,0.255,0.188,0.508,0.192,0.173,0.279,0.258,0.122,0.136, 0.316,0.326,0.278,0.136,0.197] e_row = [10,8,0,6,0,1,0,5,6,0,0,6,3,0,17,8,1,3,3,1,2,3,4,5,6,7,8,9,10,11,12, 13,14,15,16,17,18,19] e_col = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,10,8,0,6,0,1,0,5,6, 0,0,6,3,0,17,8,1,3,3] expected[:] = 0 expected[e_row, e_col] = e_data spt = G.shortest_path_subtree(0, directed=False) assert_array_almost_equal(spt.matrix('dense'), expected, decimal=3)
def _make_blob_graphs(self, k=11): pts = np.random.random(size=(20, 2)) pts[10:] += 2 labels = np.zeros(20) labels[10:] = 1 G_sparse = neighbor_graph(pts, k=k).symmetrize() G_dense = Graph.from_adj_matrix(G_sparse.matrix(dense=True)) return (G_sparse, G_dense), labels
def _make_blob_graphs(self, k=11): pts = np.random.random(size=(20, 2)) pts[10:] += 2 labels = np.zeros(20) labels[10:] = 1 G_sparse = neighbor_graph(pts, k=k).symmetrize() G_dense = Graph.from_adj_matrix(G_sparse.matrix('dense')) return (G_sparse, G_dense), labels
def main(): x, labels = prepare_data(SCORECARD_FILE) print('%d schools w/ %d features each' % x.shape) # plot k-means clusters over the first two PCs pcs = PCA(n_components=2).fit_transform(x) y_kmeans = KMeans(n_clusters=4).fit_predict(x) clouds_kmeans = word_clouds(y_kmeans, labels) _, ax = plt.subplots(figsize=(14, 6)) scatter_labeled(pcs, labels, ax=ax, colors=y_kmeans, cmap='Dark2', color_labels=clouds_kmeans, edgecolor='none') ax.set_title('Top 2 PCs, k-means labels') ax.xaxis.set_ticks([]) ax.yaxis.set_ticks([]) # build a kNN graph print('Building a 10-NN graph, based on cosine distance...') dist = pairwise_distances(x, metric='cosine') knn = neighbor_graph(dist, precomputed=True, k=10).symmetrize(method='max') knn = knn.from_adj_matrix(knn.matrix(csr=True)) # XXX: hack to sparsify print(knn.connected_components(return_labels=False), 'connected components') # compute some statistics apsp = knn.shortest_path(directed=False, unweighted=True) eccen = apsp.max(axis=0) d = eccen.argmax() print('diameter = %d: "%s" <-> "%s"' % (eccen.max(), labels[d], labels[apsp[d].argmax()])) print('radius = %d: "%s"' % (eccen.min(), labels[eccen.argmin()])) # find a spectral clustering print('Computing spectral clustering...') y_spectral = knn.cluster_spectral(9) y_spectral = np.argsort(np.argsort(-np.bincount(y_spectral)))[y_spectral] clouds_spectral = word_clouds(y_spectral, labels) # plot the new clustering over a 2d Isomap embedding print('Embedding to 2d with Isomap...') emb = knn.isomap(num_dims=2, directed=False) _, ax = plt.subplots(figsize=(14, 6)) knn.plot(emb, ax=ax, directed=False, weighted=False, edge_style='k-', vertex_style=dict(marker=',', c='k', s=1, zorder=0)) scatter_labeled(emb, labels, ax=ax, colors=y_spectral, cmap='Set1', color_labels=clouds_spectral, zorder=2, edgecolor='none') ax.set_title('Isomap embedding, spectral clustering labels') ax.xaxis.set_ticks([]) ax.yaxis.set_ticks([]) # plot the reordered distance matrix order = np.argsort(y_spectral) _, ax = plt.subplots(figsize=(8, 8)) imshow_labeled(dist[order][:,order], labels[order], y_spectral, ax=ax) ax.set_title('Pairwise cosine distance matrix') plt.show()
def test_cycle_cut(self): G = neighbor_graph(X, k=4).symmetrize(method='max', copy=False) # hack: the atomic cycle finder chooses a random vertex to start from np.random.seed(1234) res = G.cycle_cut(cycle_len_thresh=5, directed=False) diff = G.matrix('dense') - res.matrix('dense') ii, jj = np.nonzero(diff) assert_array_equal(ii, [1,1,6,17]) assert_array_equal(jj, [6,17,1,1])
def main(): print("Select coordinates for graph vertices:") plt.plot([]) coords = np.array(plt.ginput(n=-1, timeout=-1)) k = int(input("Number of nearest neighbors: ")) g = neighbor_graph(coords, k=k) print("Resulting graph:") g.plot(coords, vertex_style='ro')()
def test_cycle_cut(self): G = neighbor_graph(X, k=4).symmetrize(method='max', copy=False) # hack: the atomic cycle finder chooses a random vertex to start from np.random.seed(1234) res = G.cycle_cut(cycle_len_thresh=5, directed=False) diff = G.matrix('dense') - res.matrix('dense') ii, jj = np.nonzero(diff) assert_array_equal(ii, [1, 1, 6, 17]) assert_array_equal(jj, [6, 17, 1, 1])
def test_connected_subgraphs(self): G = Graph.from_edge_pairs(PAIRS) subgraphs = list(G.connected_subgraphs(directed=False, ordered=False)) self.assertEqual(len(subgraphs), 2) assert_array_equal(subgraphs[0].pairs(), PAIRS[:6]) assert_array_equal(subgraphs[1].pairs(), [[0,1],[1,0]]) G = neighbor_graph(X, k=2) subgraphs = list(G.connected_subgraphs(directed=True, ordered=True)) self.assertEqual(len(subgraphs), 3) self.assertEqual([g.num_vertices() for g in subgraphs], [9,6,5])
def test_connected_subgraphs(self): G = Graph.from_edge_pairs(PAIRS) subgraphs = list(G.connected_subgraphs(directed=False, ordered=False)) self.assertEqual(len(subgraphs), 2) assert_array_equal(subgraphs[0].pairs(), PAIRS[:6]) assert_array_equal(subgraphs[1].pairs(), [[0, 1], [1, 0]]) G = neighbor_graph(X, k=2) subgraphs = list(G.connected_subgraphs(directed=True, ordered=True)) self.assertEqual(len(subgraphs), 3) self.assertEqual([g.num_vertices() for g in subgraphs], [9, 6, 5])
def test_neighborhood_subgraph(self): G = neighbor_graph(X, k=4) # simple 1-neighbor subgraph g, mask = G.neighborhood_subgraph(0, radius=1, weighted=False, return_mask=True) assert_array_equal(mask.nonzero()[0], [0,3,7,10,14]) self.assertEqual(g.num_vertices(), 5) self.assertEqual(g.num_edges(), 13) # distance-based subgraph g, mask = G.neighborhood_subgraph(12, radius=0.5, return_mask=True) assert_array_equal(mask.nonzero()[0], [2,4,6,9,12,15,17]) self.assertEqual(g.num_vertices(), 7) self.assertEqual(g.num_edges(), 23)
def test_circle_tear(self): G = neighbor_graph(X, k=4).symmetrize(method='max', copy=False) # test MST start res = G.circle_tear(spanning_tree='mst', cycle_len_thresh=5) diff = G.matrix('dense') - res.matrix('dense') ii, jj = np.nonzero(diff) assert_array_equal(ii, [5,8,8,11]) assert_array_equal(jj, [8,5,11,8]) # test SPT start with a fixed starting vertex res = G.circle_tear(spanning_tree='spt', cycle_len_thresh=5, spt_idx=8) diff = G.matrix('dense') - res.matrix('dense') ii, jj = np.nonzero(diff) assert_array_equal(ii, [1,1,6,17]) assert_array_equal(jj, [6,17,1,1])
def test_minimum_spanning_subtree(self): n = X.shape[0] G = neighbor_graph(X, k=4) e_data = [0.279,0.136,0.255,0.041,0.124,0.186,0.131,0.122,0.136,0.185,0.226, 0.061,0.255,0.022,0.061,0.054,0.053,0.326,0.185,0.191,0.054,0.177, 0.279,0.226,0.224,0.041,0.122,0.177,0.136,0.053,0.186,0.224,0.131, 0.326,0.022,0.191,0.136,0.124] e_row = [0,0,1,1,1,2,2,3,3,4,4,5,6,6,7,7,7,8,9,9,10,10,11,12,12,13,13,13,14, 14,15,15,16,16,17,17,18,19] e_col = [11,14,6,13,19,15,16,13,18,9,12,7,1,17,5,10,14,16,4,17,7,13,0,4,15, 1,3,10,0,7,2,12,2,8,6,9,3,1] expected = np.zeros((n,n)) expected[e_row, e_col] = e_data mst = G.minimum_spanning_subtree() assert_array_almost_equal(mst.matrix('dense'), expected, decimal=3)
def test_circle_tear(self): G = neighbor_graph(X, k=4).symmetrize(method='max', copy=False) # test MST start res = G.circle_tear(spanning_tree='mst', cycle_len_thresh=5) diff = G.matrix('dense') - res.matrix('dense') ii, jj = np.nonzero(diff) assert_array_equal(ii, [5, 8, 8, 11]) assert_array_equal(jj, [8, 5, 11, 8]) # test SPT start with a fixed starting vertex res = G.circle_tear(spanning_tree='spt', cycle_len_thresh=5, spt_idx=8) diff = G.matrix('dense') - res.matrix('dense') ii, jj = np.nonzero(diff) assert_array_equal(ii, [1, 1, 6, 17]) assert_array_equal(jj, [6, 17, 1, 1])
def test_isograph(self): # make roughly U-shaped data theta = np.linspace(0, 2 * np.pi, 10)[1:] data = np.column_stack((np.sin(theta) * 2, np.cos(theta))) G = neighbor_graph(data, k=2) g = G.isograph() self.assertIsNot(g, G) diff = G.matrix('dense') - g.matrix('dense') ii, jj = np.nonzero(diff) assert_array_equal(ii, [3, 4]) assert_array_equal(jj, [4, 3]) # test case with large epsilon g = G.isograph(min_weight=999) self.assertIsNot(g, G) assert_array_equal(g.matrix('dense'), G.matrix('dense'))
def test_neighborhood_subgraph(self): G = neighbor_graph(X, k=4) # simple 1-neighbor subgraph g, mask = G.neighborhood_subgraph(0, radius=1, weighted=False, return_mask=True) assert_array_equal(mask.nonzero()[0], [0, 3, 7, 10, 14]) self.assertEqual(g.num_vertices(), 5) self.assertEqual(g.num_edges(), 13) # distance-based subgraph g, mask = G.neighborhood_subgraph(12, radius=0.5, return_mask=True) assert_array_equal(mask.nonzero()[0], [2, 4, 6, 9, 12, 15, 17]) self.assertEqual(g.num_vertices(), 7) self.assertEqual(g.num_edges(), 23)
def main(): np.random.seed(1234) X, theta = swiss_roll(8, 300, return_theta=True, radius=0.5) GT = np.column_stack((theta, X[:,1])) g = neighbor_graph(X, k=6) g = g.from_adj_matrix(g.matrix('dense')) ct = 12 _, axes = plt.subplots(nrows=2, ncols=2, figsize=(8, 8), sharex=True, sharey=True) _plot_diff(axes[0,0], GT, g, g.minimum_spanning_subtree(), title='MST') _plot_diff(axes[0,1], GT, g, g.circle_tear(cycle_len_thresh=ct), title='Circle Tear (%d)' % ct) _plot_diff(axes[1,0], GT, g, g.cycle_cut(cycle_len_thresh=ct), title='Cycle Cut (%d)' % ct) _plot_diff(axes[1,1], GT, g, g.isograph(), title='Isograph') plt.show()
def test_isograph(self): # make roughly U-shaped data theta = np.linspace(0, 2*np.pi, 10)[1:] data = np.column_stack((np.sin(theta)*2, np.cos(theta))) G = neighbor_graph(data, k=2) g = G.isograph() self.assertIsNot(g, G) diff = G.matrix('dense') - g.matrix('dense') ii, jj = np.nonzero(diff) assert_array_equal(ii, [3, 4]) assert_array_equal(jj, [4, 3]) # test case with large epsilon g = G.isograph(min_weight=999) self.assertIsNot(g, G) assert_array_equal(g.matrix('dense'), G.matrix('dense'))
def test_regression(self): t = np.linspace(0, 1, 31) pts = np.column_stack((np.sin(t), np.cos(t))) G = neighbor_graph(pts, k=3).symmetrize() y_mask = slice(None, None, 2) # test the interpolated case x = G.regression(t[y_mask], y_mask) assert_array_equal(t, np.linspace(0, 1, 31)) # ensure t hasn't changed self.assertLess(np.linalg.norm(t - x), 0.15) # test the boolean mask case y_mask = np.zeros_like(t, dtype=bool) y_mask[::2] = True x = G.regression(t[y_mask], y_mask) self.assertLess(np.linalg.norm(t - x), 0.15) # test the penalized case x = G.regression(t[y_mask], y_mask, smoothness_penalty=1e-4) self.assertLess(np.linalg.norm(t - x), 0.15) # test no kernel + dense laplacian case dG = Graph.from_adj_matrix(G.matrix('dense')) x = dG.regression(t[y_mask], y_mask, kernel='none') self.assertLess(np.linalg.norm(t - x), 0.25) x = dG.regression(t[y_mask], y_mask, smoothness_penalty=1e-4, kernel='none') self.assertLess(np.linalg.norm(t - x), 0.25) # test the multidimensional regression case tt = np.column_stack((t, t[::-1])) x = G.regression(tt[y_mask], y_mask) self.assertLess(np.linalg.norm(tt - x), 0.2) # check for bad inputs with self.assertRaisesRegexp(ValueError, r'^Invalid shape of y array'): G.regression([], y_mask)
def test_minimum_spanning_subtree(self): n = X.shape[0] G = neighbor_graph(X, k=4) e_data = [ 0.279, 0.136, 0.255, 0.041, 0.124, 0.186, 0.131, 0.122, 0.136, 0.185, 0.226, 0.061, 0.255, 0.022, 0.061, 0.054, 0.053, 0.326, 0.185, 0.191, 0.054, 0.177, 0.279, 0.226, 0.224, 0.041, 0.122, 0.177, 0.136, 0.053, 0.186, 0.224, 0.131, 0.326, 0.022, 0.191, 0.136, 0.124 ] e_row = [ 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 6, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 12, 12, 13, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 19 ] e_col = [ 11, 14, 6, 13, 19, 15, 16, 13, 18, 9, 12, 7, 1, 17, 5, 10, 14, 16, 4, 17, 7, 13, 0, 4, 15, 1, 3, 10, 0, 7, 2, 12, 2, 8, 6, 9, 3, 1 ] expected = np.zeros((n, n)) expected[e_row, e_col] = e_data mst = G.minimum_spanning_subtree() assert_array_almost_equal(mst.matrix('dense'), expected, decimal=3)
def test_shortest_path_subtree(self): n = X.shape[0] G = neighbor_graph(X, k=4) e_data = [ 0.163, 0.199, 0.079, 0.188, 0.173, 0.122, 0.136, 0.136, 0.197 ] e_row = [3, 0, 14, 0, 0, 3, 0, 3, 3] e_col = [1, 3, 5, 7, 10, 13, 14, 18, 19] expected = np.zeros((n, n)) expected[e_row, e_col] = e_data spt = G.shortest_path_subtree(0, directed=True) assert_array_almost_equal(spt.matrix('dense'), expected, decimal=3) # test undirected case G.symmetrize(method='max', copy=False) e_data = [ 0.185, 0.379, 0.199, 0.32, 0.205, 0.255, 0.188, 0.508, 0.192, 0.173, 0.279, 0.258, 0.122, 0.136, 0.316, 0.326, 0.278, 0.136, 0.197, 0.185, 0.379, 0.199, 0.32, 0.205, 0.255, 0.188, 0.508, 0.192, 0.173, 0.279, 0.258, 0.122, 0.136, 0.316, 0.326, 0.278, 0.136, 0.197 ] e_row = [ 10, 8, 0, 6, 0, 1, 0, 5, 6, 0, 0, 6, 3, 0, 17, 8, 1, 3, 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 ] e_col = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 10, 8, 0, 6, 0, 1, 0, 5, 6, 0, 0, 6, 3, 0, 17, 8, 1, 3, 3 ] expected[:] = 0 expected[e_row, e_col] = e_data spt = G.shortest_path_subtree(0, directed=False) assert_array_almost_equal(spt.matrix('dense'), expected, decimal=3)
def test_regression(self): t = np.linspace(0, 1, 31) pts = np.column_stack((np.sin(t), np.cos(t))) G = neighbor_graph(pts, k=3).symmetrize() y_mask = slice(None, None, 2) # test the interpolated case x = G.regression(t[y_mask], y_mask) assert_array_equal(t, np.linspace(0, 1, 31)) # ensure t hasn't changed self.assertLess(np.linalg.norm(t - x), 0.15) # test the boolean mask case y_mask = np.zeros_like(t, dtype=bool) y_mask[::2] = True x = G.regression(t[y_mask], y_mask) self.assertLess(np.linalg.norm(t - x), 0.15) # test the penalized case x = G.regression(t[y_mask], y_mask, smoothness_penalty=1e-4) self.assertLess(np.linalg.norm(t - x), 0.15) # test no kernel + dense laplacian case dG = Graph.from_adj_matrix(G.matrix(dense=True)) x = dG.regression(t[y_mask], y_mask, kernel='none') self.assertLess(np.linalg.norm(t - x), 0.25) x = dG.regression(t[y_mask], y_mask, smoothness_penalty=1e-4, kernel='none') self.assertLess(np.linalg.norm(t - x), 0.25) # test the multidimensional regression case tt = np.column_stack((t, t[::-1])) x = G.regression(tt[y_mask], y_mask) self.assertLess(np.linalg.norm(tt - x), 0.2) # check for bad inputs with self.assertRaisesRegexp(ValueError, r'^Invalid shape of y array'): G.regression([], y_mask)
def ngraph(*a, **k): return neighbor_graph(*a,**k).matrix(dense=True)
def ngraph(*a, **k): return neighbor_graph(*a, **k).matrix('dense')
def ngraph(*a, **k): return neighbor_graph(*a,**k).matrix('dense')
def time_neighbor_graph(self, epsilon, k, weighting): gc.neighbor_graph(self.X, k=k, epsilon=epsilon, weighting=weighting)
def time_neighbor_graph_precomputed(self, epsilon, k, weighting): gc.neighbor_graph(self.D, k=k, epsilon=epsilon, weighting=weighting, precomputed=True)