def average_error_rate(num_trials=100): knn_err = np.empty((num_trials, 2)) eps_err = np.empty((num_trials, 2)) bma_err = np.empty((num_trials, 2)) msg_err = np.empty((num_trials, 2)) for i in xrange(num_trials): X, GT = make_test_data(verify=True) D = pairwise_distances(X, metric='sqeuclidean') W = neighbor_graph(D, precomputed=True, k=5, symmetrize=True) knn_err[i] = error_ratio(W, GT, return_tuple=True) W = neighbor_graph(D, precomputed=True, epsilon=1.0) eps_err[i] = error_ratio(W, GT, return_tuple=True) W = hacky_b_matching(D, 5) bma_err[i] = error_ratio(W, GT, return_tuple=True) W = manifold_spanning_graph(X, 2) msg_err[i] = error_ratio(W, GT, return_tuple=True) errors = np.hstack( (knn_err[:, :1], eps_err[:, :1], bma_err[:, :1], msg_err[:, :1])) edges = np.hstack( (knn_err[:, 1:], eps_err[:, 1:], bma_err[:, 1:], msg_err[:, 1:])) labels = ('$k$-nearest', '$\\epsilon$-close', '$b$-matching', 'MSG') pyplot.figure(figsize=(5, 6)) ax = pyplot.gca() ax.boxplot(errors, widths=0.75) ax.set_xticklabels(labels, fontsize=12) ymin, ymax = pyplot.ylim() pyplot.ylim((ymin - 1, ymax)) savefig('average_error.png') pyplot.figure(figsize=(5, 6)) ax = pyplot.gca() ax.boxplot(edges, widths=0.75) ax.set_xticklabels(labels, fontsize=12) savefig('average_edges.png')
def average_error_rate(num_trials=100): knn_err = np.empty((num_trials,2)) eps_err = np.empty((num_trials,2)) bma_err = np.empty((num_trials,2)) msg_err = np.empty((num_trials,2)) for i in xrange(num_trials): X, GT = make_test_data(verify=True) D = pairwise_distances(X, metric='sqeuclidean') W = neighbor_graph(D, precomputed=True, k=5, symmetrize=True) knn_err[i] = error_ratio(W, GT, return_tuple=True) W = neighbor_graph(D, precomputed=True, epsilon=1.0) eps_err[i] = error_ratio(W, GT, return_tuple=True) W = hacky_b_matching(D, 5) bma_err[i] = error_ratio(W, GT, return_tuple=True) W = manifold_spanning_graph(X, 2) msg_err[i] = error_ratio(W, GT, return_tuple=True) errors = np.hstack((knn_err[:,:1],eps_err[:,:1],bma_err[:,:1],msg_err[:,:1])) edges = np.hstack((knn_err[:,1:],eps_err[:,1:],bma_err[:,1:],msg_err[:,1:])) labels = ('$k$-nearest','$\\epsilon$-close','$b$-matching','MSG') pyplot.figure(figsize=(5,6)) ax = pyplot.gca() ax.boxplot(errors, widths=0.75) ax.set_xticklabels(labels, fontsize=12) ymin,ymax = pyplot.ylim() pyplot.ylim((ymin-1, ymax)) savefig('average_error.png') pyplot.figure(figsize=(5,6)) ax = pyplot.gca() ax.boxplot(edges, widths=0.75) ax.set_xticklabels(labels, fontsize=12) savefig('average_edges.png')
def swiss_roll_experiment(): embed_dim = 2 X, GT = make_test_data(verify=True) plot_canonical_roll(X, GT) evaluate_sensitivity(X, GT) # kNN D = pairwise_distances(X, metric='sqeuclidean') for k in xrange(3, 10): Wknn = neighbor_graph(D, precomputed=True, k=k, symmetrize=True) n = connected_components(Wknn, directed=False, return_labels=False) if n == 1: break else: assert False, 'k too low' print 'k:', k, 'error:', error_ratio(Wknn, GT) plot_roll(Wknn, X, GT[:, 0], embed_dim, 'swiss_knn_result.png') # eball for eps in np.linspace(0.4, 1.2, 50): Weps = neighbor_graph(D, precomputed=True, epsilon=eps, symmetrize=False) n = connected_components(Weps, directed=False, return_labels=False) if n == 1: break else: assert False, 'eps too low' print 'eps:', eps, 'error:', error_ratio(Weps, GT) plot_roll(Weps, X, GT[:, 0], embed_dim, 'swiss_eps_result.png') # b-matching for b in xrange(3, 10): Wbma = hacky_b_matching(D, b) n = connected_components(Wbma, directed=False, return_labels=False) if n == 1: break else: assert False, 'b too low' print 'b:', b, 'error:', error_ratio(Wbma, GT) plot_roll(Wbma, X, GT[:, 0], embed_dim, 'swiss_bma_result.png') # MSG Wmsg = manifold_spanning_graph(X, embed_dim) print 'MSG error:', error_ratio(Wmsg, GT) plot_roll(Wmsg, X, GT[:, 0], embed_dim, 'swiss_msg_result.png')
def compute_Ws(X, num_ccs): with Timer('Calculating pairwise distances...'): D = pairwise_distances(X, metric='sqeuclidean') np.save('mnist_D.npy', D) # k-nn with Timer('Calculating knn graph...'): for k in xrange(1, 10): Wknn = neighbor_graph(D, precomputed=True, k=k, symmetrize=True) n = connected_components(Wknn, directed=False, return_labels=False) if n <= num_ccs: break else: assert False, 'k too low' np.save('mnist_Wknn.npy', Wknn) print 'knn (k=%d)' % k # b-matching with Timer('Calculating b-matching graph...'): # using 8 decimal places kills the disk Wbma = hacky_b_matching(D, k, fmt='%.1f') np.save('mnist_Wbma.npy', Wbma) # msg with Timer('Calculating MSG graph...'): Wmsg = manifold_spanning_graph(X, 2, num_ccs=num_ccs) np.save('mnist_Wmsg.npy', Wmsg) return D, Wknn, Wbma, Wmsg
def compute_Ws(X, num_ccs): with Timer('Calculating pairwise distances...'): D = pairwise_distances(X, metric='sqeuclidean') np.save('mnist_D.npy', D) # k-nn with Timer('Calculating knn graph...'): for k in xrange(1,10): Wknn = neighbor_graph(D, precomputed=True, k=k, symmetrize=True) n = connected_components(Wknn, directed=False, return_labels=False) if n <= num_ccs: break else: assert False, 'k too low' np.save('mnist_Wknn.npy', Wknn) print 'knn (k=%d)' % k # b-matching with Timer('Calculating b-matching graph...'): # using 8 decimal places kills the disk Wbma = hacky_b_matching(D, k, fmt='%.1f') np.save('mnist_Wbma.npy', Wbma) # msg with Timer('Calculating MSG graph...'): Wmsg = manifold_spanning_graph(X, 2, num_ccs=num_ccs) np.save('mnist_Wmsg.npy', Wmsg) return D, Wknn, Wbma, Wmsg
def swiss_roll_experiment(): embed_dim = 2 X, GT = make_test_data(verify=True) plot_canonical_roll(X, GT) evaluate_sensitivity(X, GT) # kNN D = pairwise_distances(X, metric='sqeuclidean') for k in xrange(3,10): Wknn = neighbor_graph(D, precomputed=True, k=k, symmetrize=True) n = connected_components(Wknn, directed=False, return_labels=False) if n == 1: break else: assert False, 'k too low' print 'k:', k, 'error:', error_ratio(Wknn, GT) plot_roll(Wknn, X, GT[:,0], embed_dim, 'swiss_knn_result.png') # eball for eps in np.linspace(0.4, 1.2, 50): Weps = neighbor_graph(D, precomputed=True, epsilon=eps, symmetrize=False) n = connected_components(Weps, directed=False, return_labels=False) if n == 1: break else: assert False, 'eps too low' print 'eps:', eps, 'error:', error_ratio(Weps, GT) plot_roll(Weps, X, GT[:,0], embed_dim, 'swiss_eps_result.png') # b-matching for b in xrange(3,10): Wbma = hacky_b_matching(D, b) n = connected_components(Wbma, directed=False, return_labels=False) if n == 1: break else: assert False, 'b too low' print 'b:', b, 'error:', error_ratio(Wbma, GT) plot_roll(Wbma, X, GT[:,0], embed_dim, 'swiss_bma_result.png') # MSG Wmsg = manifold_spanning_graph(X, embed_dim) print 'MSG error:', error_ratio(Wmsg, GT) plot_roll(Wmsg, X, GT[:,0], embed_dim, 'swiss_msg_result.png')
def lapeig_linear(X=None, W=None, L=None, num_vecs=None, k=None, eball=None): if L is None: if W is None: W = neighbor_graph(X, k=k, epsilon=eball) L = laplacian(W) u, s, _ = np.linalg.svd(np.dot(X.T, X)) Fplus = np.linalg.pinv(np.dot(u, np.diag(np.sqrt(s)))) T = reduce(np.dot, (Fplus, X.T, L, X, Fplus.T)) L = 0.5 * (T + T.T) return lapeig(L=L, num_vecs=num_vecs)
def evaluate_sensitivity(X, GT): eps_values = np.linspace(0.2, 1.2, 50) knn_values = np.arange(1, 7) eps_method = lambda eps: neighbor_graph(X, epsilon=eps) knn_method = lambda k: neighbor_graph(X, k=k, symmetrize=False) errors, edges, conn = eval_method(knn_values, knn_method, GT) one_cc = knn_values[len(conn) - np.searchsorted(conn[::-1], 1, side='right')] fig, axes = pyplot.subplots(nrows=2, ncols=2) knn_err_ax, eps_err_ax = axes[0] knn_edge_ax, eps_edge_ax = axes[1] knn_err_ax.set_ylabel('Edge error %', fontsize=14) knn_err_ax.plot(knn_values, errors * 100, 'k+-') knn_err_ax.axvline(one_cc, color='k', linestyle='--') knn_err_ax.set_ylim((-0.05, knn_err_ax.get_ylim()[1])) knn_edge_ax.set_xlabel('$k$', fontsize=16) knn_edge_ax.set_ylabel('Total edges', fontsize=14) knn_edge_ax.plot(knn_values, edges, 'k+-') knn_edge_ax.axvline(one_cc, color='k', linestyle='--') errors, edges, conn = eval_method(eps_values, eps_method, GT) one_cc = eps_values[len(conn) - np.searchsorted(conn[::-1], 1, side='right')] eps_err_ax.plot(eps_values, errors * 100, 'k+-') eps_err_ax.axvline(one_cc, color='k', linestyle='--') eps_err_ax.set_ylim((-0.1, eps_err_ax.get_ylim()[1])) eps_edge_ax.set_xlabel('$\\epsilon$', fontsize=16) eps_edge_ax.plot(eps_values, edges, 'k+-') eps_edge_ax.axvline(one_cc, color='k', linestyle='--') for ax in (knn_err_ax, knn_edge_ax): start, end = ax.get_xlim() ax.xaxis.set_ticks(np.arange(start, end + 1)) fig.tight_layout() savefig('sensitivity.png')
def make_test_data(verify=True): while True: X, theta = swiss_roll(18, 500, radius=4.8, return_theta=True, theta_noise=0, radius_noise=0) GT = np.hstack((theta[:,None], X[:,1:2])) GT -= GT.min(axis=0) GT /= GT.max(axis=0) if not verify: break # ensure our test_data fits our 1-NN assumption W = neighbor_graph(X, k=1, symmetrize=False) if error_ratio(W, GT) < 1e-10: break return X, GT
def evaluate_sensitivity(X, GT): eps_values = np.linspace(0.2, 1.2, 50) knn_values = np.arange(1,7) eps_method = lambda eps: neighbor_graph(X, epsilon=eps) knn_method = lambda k: neighbor_graph(X, k=k, symmetrize=False) errors, edges, conn = eval_method(knn_values, knn_method, GT) one_cc = knn_values[len(conn) - np.searchsorted(conn[::-1], 1, side='right')] fig, axes = pyplot.subplots(nrows=2, ncols=2) knn_err_ax, eps_err_ax = axes[0] knn_edge_ax, eps_edge_ax = axes[1] knn_err_ax.set_ylabel('Edge error %', fontsize=14) knn_err_ax.plot(knn_values, errors*100, 'k+-') knn_err_ax.axvline(one_cc, color='k', linestyle='--') knn_err_ax.set_ylim((-0.05, knn_err_ax.get_ylim()[1])) knn_edge_ax.set_xlabel('$k$', fontsize=16) knn_edge_ax.set_ylabel('Total edges', fontsize=14) knn_edge_ax.plot(knn_values, edges, 'k+-') knn_edge_ax.axvline(one_cc, color='k', linestyle='--') errors, edges, conn = eval_method(eps_values, eps_method, GT) one_cc = eps_values[len(conn) - np.searchsorted(conn[::-1], 1, side='right')] eps_err_ax.plot(eps_values, errors*100, 'k+-') eps_err_ax.axvline(one_cc, color='k', linestyle='--') eps_err_ax.set_ylim((-0.1, eps_err_ax.get_ylim()[1])) eps_edge_ax.set_xlabel('$\\epsilon$', fontsize=16) eps_edge_ax.plot(eps_values, edges, 'k+-') eps_edge_ax.axvline(one_cc, color='k', linestyle='--') for ax in (knn_err_ax, knn_edge_ax): start,end = ax.get_xlim() ax.xaxis.set_ticks(np.arange(start, end+1)) fig.tight_layout() savefig('sensitivity.png')
def make_test_data(verify=True): while True: X, theta = swiss_roll(18, 500, radius=4.8, return_theta=True, theta_noise=0, radius_noise=0) GT = np.hstack((theta[:, None], X[:, 1:2])) GT -= GT.min(axis=0) GT /= GT.max(axis=0) if not verify: break # ensure our test_data fits our 1-NN assumption W = neighbor_graph(X, k=1, symmetrize=False) if error_ratio(W, GT) < 1e-10: break return X, GT
def show_skeleton_issue(): t = np.linspace(0,4,25)[:,None] X = np.hstack((np.cos(t), np.random.uniform(-1,1,t.shape), np.sin(t))) GT = np.hstack((t, X[:,1:2])) W = neighbor_graph(X, k=1, symmetrize=False) W = grow_trees(X, W, 2) labels = join_CCs_simple(X, W) # switch up the CC order for better contrast between groups order = np.arange(labels.max()+1) np.random.shuffle(order) labels = order[labels] show_neighbor_graph(GT, W, vertex_style=None, edge_style='k-') ax = pyplot.gca() for l,marker in zip(np.unique(labels), "osD^v><"): scatterplot(GT[labels==l], marker, ax=ax, edgecolor='k', c='white') ax.tick_params(which='both', bottom='off', top='off', left='off', right='off', labelbottom='off', labelleft='off') savefig('skeleton.png')
def show_skeleton_issue(): t = np.linspace(0, 4, 25)[:, None] X = np.hstack((np.cos(t), np.random.uniform(-1, 1, t.shape), np.sin(t))) GT = np.hstack((t, X[:, 1:2])) W = neighbor_graph(X, k=1, symmetrize=False) W = grow_trees(X, W, 2) labels = join_CCs_simple(X, W) # switch up the CC order for better contrast between groups order = np.arange(labels.max() + 1) np.random.shuffle(order) labels = order[labels] show_neighbor_graph(GT, W, vertex_style=None, edge_style='k-') ax = pyplot.gca() for l, marker in zip(np.unique(labels), "osD^v><"): scatterplot(GT[labels == l], marker, ax=ax, edgecolor='k', c='white') ax.tick_params(which='both', bottom='off', top='off', left='off', right='off', labelbottom='off', labelleft='off') savefig('skeleton.png')
def manifold_spanning_graph(X, embed_dim, num_ccs=1, verbose=False): W = neighbor_graph(X, k=1, symmetrize=True) W = grow_trees(X, W, embed_dim, verbose=verbose) CC_labels, angle_thresh = join_CCs(X, W, embed_dim, num_ccs=num_ccs, verbose=verbose) if num_ccs == 1: W = flesh_out(X, W, embed_dim, CC_labels, angle_thresh=angle_thresh, min_shortcircuit=embed_dim+1, verbose=verbose) else: n, labels = connected_components(W, directed=False, return_labels=True) for i in xrange(n): mask = labels==i print 'CC', i, 'has size', np.count_nonzero(mask) # This step is often counterproductive for >1 CC. # idx = np.ix_(mask, mask) # W[idx] = flesh_out(X[mask], W[idx], embed_dim, CC_labels[mask], # angle_thresh=angle_thresh, # min_shortcircuit=embed_dim+1, # verbose=verbose) return W
if __name__ == '__main__': # simple usage example / visual test case from matplotlib import pyplot from viz import show_neighbor_graph from util import Timer from correspondence import Correspondence from synthetic_data import cylinder n = 300 knn = 5 out_dim = 2 X = cylinder(np.linspace(0, 4, n)) W = neighbor_graph(X=X, k=knn) corr = Correspondence(matrix=W) with Timer('LapEig'): le_embed = lapeig(W=W, num_vecs=out_dim) with Timer('Linear LapEig'): # lapeig_linear returns a projector, not an embedding lel_embed = np.dot(X, lapeig_linear(X=X, W=W, num_vecs=out_dim, k=knn)) with Timer('Isomap'): im_embed = isomap(X=X, num_vecs=out_dim, k=knn) with Timer('LLE'): lle_embed = lle(X=X, num_vecs=out_dim, k=knn) with Timer('SFA'): sfa_embed = np.dot(X, slow_features(X=X, num_vecs=out_dim)) show_neighbor_graph(X, corr, 'Original space')
if __name__ == "__main__": # simple usage example / visual test case from matplotlib import pyplot from viz import show_neighbor_graph from util import Timer from correspondence import Correspondence from synthetic_data import cylinder n = 300 knn = 5 out_dim = 2 X = cylinder(np.linspace(0, 4, n)) W = neighbor_graph(X=X, k=knn) corr = Correspondence(matrix=W) with Timer("LapEig"): le_embed = lapeig(W=W, num_vecs=out_dim) with Timer("Linear LapEig"): # lapeig_linear returns a projector, not an embedding lel_embed = np.dot(X, lapeig_linear(X=X, W=W, num_vecs=out_dim, k=knn)) with Timer("Isomap"): im_embed = isomap(X=X, num_vecs=out_dim, k=knn) with Timer("LLE"): lle_embed = lle(X=X, num_vecs=out_dim, k=knn) with Timer("SFA"): sfa_embed = np.dot(X, slow_features(X=X, num_vecs=out_dim)) show_neighbor_graph(X, corr, "Original space")
t = np.linspace(0, 5, n) if three_d: X = swiss_roll(t, lambda A: np.sin(A)**2) Y = np.vstack((np.sin(t)**2, t, np.zeros(n))).T else: X = spiral(t) Y = X[:, (1, 0)] # swap x and y axes return add_noise(X, 0.05), add_noise(Y, 0.05) if __name__ == '__main__': n = 500 d = 3 X, Y = gen_data(n, d == 3) corr = Correspondence(matrix=np.eye(n)) Wx = neighbor_graph(X, k=5) Wy = neighbor_graph(Y, k=5) lin_aligners = ( ('no alignment', lambda: TrivialAlignment(X, Y)), ('affine', lambda: Affine(X, Y, corr, d)), ('procrustes', lambda: Procrustes(X, Y, corr, d)), ('cca', lambda: CCA(X, Y, corr, d)), ('cca_v2', lambda: CCAv2(X, Y, d)), ('linear manifold', lambda: ManifoldLinear(X, Y, corr, d, Wx, Wy)), ('ctw', lambda: ctw(X, Y, d)[1]), ('manifold warping', lambda: manifold_warping_linear(X, Y, d, Wx, Wy)[1]), ) other_aligners = (
warp_inds[i] = P[j, 1] return A[warp_inds] def _bound_row(self): P = self.pairs() n = P.shape[0] B = np.zeros((P[-1, 0] + 1, 2), dtype=np.int) head = 0 while head < n: i = P[head, 0] tail = head + 1 while tail < n and P[tail, 0] == i: tail += 1 B[i, :] = P[(head, tail - 1), 1] head = tail return B if __name__ == '__main__': # simple sanity-check tests from neighborhood import neighbor_graph from viz import show_neighbor_graph, pyplot n = 500 data = np.random.uniform(-1, 1, (n, 2)) corr_k = Correspondence(matrix=neighbor_graph(data, k=3)) corr_eps = Correspondence(matrix=neighbor_graph(data, epsilon=0.01)) pyplot.subplot(1, 2, 1) show_neighbor_graph(data, corr_k, 'kNN graph, k = 3') pyplot.subplot(1, 2, 2) show_neighbor_graph(data, corr_eps, '$\epsilon$-ball graph, $\epsilon$ = 0.1')()
rf = r['load'](file) dayExpr = pandas2ri.ri2py_dataframe(r['dayExpr']) nightExpr = pandas2ri.ri2py_dataframe(r['nightExpr']) X = dayExpr.as_matrix() # datWorm.iloc[:, 0:datWorm.shape[1]].as_matrix() Y = nightExpr.as_matrix() # datFly.iloc[:, 0:datFly.shape[1]].as_matrix() n = 17695 d = 3 X_normalized = preprocessing.normalize(X, norm='l2').T Y_normalized = preprocessing.normalize(Y, norm='l2')[0:13, :].T corr = Correspondence( matrix=np.eye(n)) # Correspondence(matrix=corr.as_matrix()) Wx = neighbor_graph(X_normalized, k=5) Wy = neighbor_graph(Y_normalized, k=5) lin_aligners = ( ('no alignment', lambda: TrivialAlignment(X_normalized, Y_normalized, d)), # ('affine', lambda: Affine(X,Y,corr,d)), # ('procrustes', lambda: Procrustes(X,Y,corr,d)), ('cca', lambda: CCA(X_normalized, Y_normalized, corr, d)), # ('cca_v2', lambda: CCAv2(X,Y,d)), ('linear manifold', lambda: ManifoldLinear(X_normalized, Y_normalized, corr, d, Wx, Wy)), ('ctw', lambda: ctw(X_normalized, Y_normalized, d)[1]), ('manifold warping', lambda: manifold_warping_linear(X_normalized, Y_normalized, d, Wx, Wy)[1] ), )
warp_inds[i] = P[j,1] return A[warp_inds] def _bound_row(self): P = self.pairs() n = P.shape[0] B = np.zeros((P[-1,0]+1,2),dtype=np.int) head = 0 while head < n: i = P[head,0] tail = head+1 while tail < n and P[tail,0] == i: tail += 1 B[i,:] = P[(head,tail-1),1] head = tail return B if __name__ == '__main__': # simple sanity-check tests from neighborhood import neighbor_graph from viz import show_neighbor_graph, pyplot n = 500 data = np.random.uniform(-1,1,(n,2)) corr_k = Correspondence(matrix=neighbor_graph(data,k=3)) corr_eps = Correspondence(matrix=neighbor_graph(data,epsilon=0.01)) pyplot.subplot(1,2,1) show_neighbor_graph(data,corr_k,'kNN graph, k = 3') pyplot.subplot(1,2,2) show_neighbor_graph(data, corr_eps, '$\epsilon$-ball graph, $\epsilon$ = 0.1')()
t = np.linspace(0,5,n) if three_d: X = swiss_roll(t,lambda A: np.sin(A)**2) Y = np.vstack((np.sin(t)**2,t,np.zeros(n))).T else: X = spiral(t) Y = X[:,(1,0)] # swap x and y axes return add_noise(X,0.05), add_noise(Y,0.05) if __name__ == '__main__': n = 500 d = 2 X,Y = gen_data(n, d==3) corr = Correspondence(matrix=np.eye(n)) Wx = neighbor_graph(X,k=5) Wy = neighbor_graph(Y,k=5) lin_aligners = ( ('no alignment', lambda: TrivialAlignment(X,Y)), ('affine', lambda: Affine(X,Y,corr,d)), ('procrustes', lambda: Procrustes(X,Y,corr,d)), ('cca', lambda: CCA(X,Y,corr,d)), ('cca_v2', lambda: CCAv2(X,Y,d)), ('linear manifold', lambda: ManifoldLinear(X,Y,corr,d,Wx,Wy)), ('ctw', lambda: ctw(X,Y,d)[1]), ('manifold warping', lambda: manifold_warping_linear(X,Y,d,Wx,Wy)[1]), ) other_aligners = ( ('dtw', lambda: (X, dtw(X,Y).warp(X))),