def compute_Ws(X, num_ccs): with Timer('Calculating pairwise distances...'): D = pairwise_distances(X, metric='sqeuclidean') np.save('mnist_D.npy', D) # k-nn with Timer('Calculating knn graph...'): for k in xrange(1,10): Wknn = neighbor_graph(D, precomputed=True, k=k, symmetrize=True) n = connected_components(Wknn, directed=False, return_labels=False) if n <= num_ccs: break else: assert False, 'k too low' np.save('mnist_Wknn.npy', Wknn) print 'knn (k=%d)' % k # b-matching with Timer('Calculating b-matching graph...'): # using 8 decimal places kills the disk Wbma = hacky_b_matching(D, k, fmt='%.1f') np.save('mnist_Wbma.npy', Wbma) # msg with Timer('Calculating MSG graph...'): Wmsg = manifold_spanning_graph(X, 2, num_ccs=num_ccs) np.save('mnist_Wmsg.npy', Wmsg) return D, Wknn, Wbma, Wmsg
def average_error_rate(num_trials=100): knn_err = np.empty((num_trials,2)) eps_err = np.empty((num_trials,2)) bma_err = np.empty((num_trials,2)) msg_err = np.empty((num_trials,2)) for i in xrange(num_trials): X, GT = make_test_data(verify=True) D = pairwise_distances(X, metric='sqeuclidean') W = neighbor_graph(D, precomputed=True, k=5, symmetrize=True) knn_err[i] = error_ratio(W, GT, return_tuple=True) W = neighbor_graph(D, precomputed=True, epsilon=1.0) eps_err[i] = error_ratio(W, GT, return_tuple=True) W = hacky_b_matching(D, 5) bma_err[i] = error_ratio(W, GT, return_tuple=True) W = manifold_spanning_graph(X, 2) msg_err[i] = error_ratio(W, GT, return_tuple=True) errors = np.hstack((knn_err[:,:1],eps_err[:,:1],bma_err[:,:1],msg_err[:,:1])) edges = np.hstack((knn_err[:,1:],eps_err[:,1:],bma_err[:,1:],msg_err[:,1:])) labels = ('$k$-nearest','$\\epsilon$-close','$b$-matching','MSG') pyplot.figure(figsize=(5,6)) ax = pyplot.gca() ax.boxplot(errors, widths=0.75) ax.set_xticklabels(labels, fontsize=12) ymin,ymax = pyplot.ylim() pyplot.ylim((ymin-1, ymax)) savefig('average_error.png') pyplot.figure(figsize=(5,6)) ax = pyplot.gca() ax.boxplot(edges, widths=0.75) ax.set_xticklabels(labels, fontsize=12) savefig('average_edges.png')
def average_error_rate(num_trials=100): knn_err = np.empty((num_trials, 2)) eps_err = np.empty((num_trials, 2)) bma_err = np.empty((num_trials, 2)) msg_err = np.empty((num_trials, 2)) for i in xrange(num_trials): X, GT = make_test_data(verify=True) D = pairwise_distances(X, metric='sqeuclidean') W = neighbor_graph(D, precomputed=True, k=5, symmetrize=True) knn_err[i] = error_ratio(W, GT, return_tuple=True) W = neighbor_graph(D, precomputed=True, epsilon=1.0) eps_err[i] = error_ratio(W, GT, return_tuple=True) W = hacky_b_matching(D, 5) bma_err[i] = error_ratio(W, GT, return_tuple=True) W = manifold_spanning_graph(X, 2) msg_err[i] = error_ratio(W, GT, return_tuple=True) errors = np.hstack( (knn_err[:, :1], eps_err[:, :1], bma_err[:, :1], msg_err[:, :1])) edges = np.hstack( (knn_err[:, 1:], eps_err[:, 1:], bma_err[:, 1:], msg_err[:, 1:])) labels = ('$k$-nearest', '$\\epsilon$-close', '$b$-matching', 'MSG') pyplot.figure(figsize=(5, 6)) ax = pyplot.gca() ax.boxplot(errors, widths=0.75) ax.set_xticklabels(labels, fontsize=12) ymin, ymax = pyplot.ylim() pyplot.ylim((ymin - 1, ymax)) savefig('average_error.png') pyplot.figure(figsize=(5, 6)) ax = pyplot.gca() ax.boxplot(edges, widths=0.75) ax.set_xticklabels(labels, fontsize=12) savefig('average_edges.png')
def compute_Ws(X, num_ccs): with Timer('Calculating pairwise distances...'): D = pairwise_distances(X, metric='sqeuclidean') np.save('mnist_D.npy', D) # k-nn with Timer('Calculating knn graph...'): for k in xrange(1, 10): Wknn = neighbor_graph(D, precomputed=True, k=k, symmetrize=True) n = connected_components(Wknn, directed=False, return_labels=False) if n <= num_ccs: break else: assert False, 'k too low' np.save('mnist_Wknn.npy', Wknn) print 'knn (k=%d)' % k # b-matching with Timer('Calculating b-matching graph...'): # using 8 decimal places kills the disk Wbma = hacky_b_matching(D, k, fmt='%.1f') np.save('mnist_Wbma.npy', Wbma) # msg with Timer('Calculating MSG graph...'): Wmsg = manifold_spanning_graph(X, 2, num_ccs=num_ccs) np.save('mnist_Wmsg.npy', Wmsg) return D, Wknn, Wbma, Wmsg
def swiss_roll_experiment(): embed_dim = 2 X, GT = make_test_data(verify=True) plot_canonical_roll(X, GT) evaluate_sensitivity(X, GT) # kNN D = pairwise_distances(X, metric='sqeuclidean') for k in xrange(3, 10): Wknn = neighbor_graph(D, precomputed=True, k=k, symmetrize=True) n = connected_components(Wknn, directed=False, return_labels=False) if n == 1: break else: assert False, 'k too low' print 'k:', k, 'error:', error_ratio(Wknn, GT) plot_roll(Wknn, X, GT[:, 0], embed_dim, 'swiss_knn_result.png') # eball for eps in np.linspace(0.4, 1.2, 50): Weps = neighbor_graph(D, precomputed=True, epsilon=eps, symmetrize=False) n = connected_components(Weps, directed=False, return_labels=False) if n == 1: break else: assert False, 'eps too low' print 'eps:', eps, 'error:', error_ratio(Weps, GT) plot_roll(Weps, X, GT[:, 0], embed_dim, 'swiss_eps_result.png') # b-matching for b in xrange(3, 10): Wbma = hacky_b_matching(D, b) n = connected_components(Wbma, directed=False, return_labels=False) if n == 1: break else: assert False, 'b too low' print 'b:', b, 'error:', error_ratio(Wbma, GT) plot_roll(Wbma, X, GT[:, 0], embed_dim, 'swiss_bma_result.png') # MSG Wmsg = manifold_spanning_graph(X, embed_dim) print 'MSG error:', error_ratio(Wmsg, GT) plot_roll(Wmsg, X, GT[:, 0], embed_dim, 'swiss_msg_result.png')
def swiss_roll_experiment(): embed_dim = 2 X, GT = make_test_data(verify=True) plot_canonical_roll(X, GT) evaluate_sensitivity(X, GT) # kNN D = pairwise_distances(X, metric='sqeuclidean') for k in xrange(3,10): Wknn = neighbor_graph(D, precomputed=True, k=k, symmetrize=True) n = connected_components(Wknn, directed=False, return_labels=False) if n == 1: break else: assert False, 'k too low' print 'k:', k, 'error:', error_ratio(Wknn, GT) plot_roll(Wknn, X, GT[:,0], embed_dim, 'swiss_knn_result.png') # eball for eps in np.linspace(0.4, 1.2, 50): Weps = neighbor_graph(D, precomputed=True, epsilon=eps, symmetrize=False) n = connected_components(Weps, directed=False, return_labels=False) if n == 1: break else: assert False, 'eps too low' print 'eps:', eps, 'error:', error_ratio(Weps, GT) plot_roll(Weps, X, GT[:,0], embed_dim, 'swiss_eps_result.png') # b-matching for b in xrange(3,10): Wbma = hacky_b_matching(D, b) n = connected_components(Wbma, directed=False, return_labels=False) if n == 1: break else: assert False, 'b too low' print 'b:', b, 'error:', error_ratio(Wbma, GT) plot_roll(Wbma, X, GT[:,0], embed_dim, 'swiss_bma_result.png') # MSG Wmsg = manifold_spanning_graph(X, embed_dim) print 'MSG error:', error_ratio(Wmsg, GT) plot_roll(Wmsg, X, GT[:,0], embed_dim, 'swiss_msg_result.png')