def compute_Ws(X, num_ccs):
  with Timer('Calculating pairwise distances...'):
    D = pairwise_distances(X, metric='sqeuclidean')
  np.save('mnist_D.npy', D)
  # k-nn
  with Timer('Calculating knn graph...'):
    for k in xrange(1,10):
      Wknn = neighbor_graph(D, precomputed=True, k=k, symmetrize=True)
      n = connected_components(Wknn, directed=False, return_labels=False)
      if n <= num_ccs:
        break
    else:
      assert False, 'k too low'
  np.save('mnist_Wknn.npy', Wknn)
  print 'knn (k=%d)' % k

  # b-matching
  with Timer('Calculating b-matching graph...'):
    # using 8 decimal places kills the disk
    Wbma = hacky_b_matching(D, k, fmt='%.1f')
  np.save('mnist_Wbma.npy', Wbma)

  # msg
  with Timer('Calculating MSG graph...'):
    Wmsg = manifold_spanning_graph(X, 2, num_ccs=num_ccs)
  np.save('mnist_Wmsg.npy', Wmsg)

  return D, Wknn, Wbma, Wmsg
def average_error_rate(num_trials=100):
  knn_err = np.empty((num_trials,2))
  eps_err = np.empty((num_trials,2))
  bma_err = np.empty((num_trials,2))
  msg_err = np.empty((num_trials,2))
  for i in xrange(num_trials):
    X, GT = make_test_data(verify=True)
    D = pairwise_distances(X, metric='sqeuclidean')
    W = neighbor_graph(D, precomputed=True, k=5, symmetrize=True)
    knn_err[i] = error_ratio(W, GT, return_tuple=True)
    W = neighbor_graph(D, precomputed=True, epsilon=1.0)
    eps_err[i] = error_ratio(W, GT, return_tuple=True)
    W = hacky_b_matching(D, 5)
    bma_err[i] = error_ratio(W, GT, return_tuple=True)
    W = manifold_spanning_graph(X, 2)
    msg_err[i] = error_ratio(W, GT, return_tuple=True)
  errors = np.hstack((knn_err[:,:1],eps_err[:,:1],bma_err[:,:1],msg_err[:,:1]))
  edges = np.hstack((knn_err[:,1:],eps_err[:,1:],bma_err[:,1:],msg_err[:,1:]))
  labels = ('$k$-nearest','$\\epsilon$-close','$b$-matching','MSG')

  pyplot.figure(figsize=(5,6))
  ax = pyplot.gca()
  ax.boxplot(errors, widths=0.75)
  ax.set_xticklabels(labels, fontsize=12)
  ymin,ymax = pyplot.ylim()
  pyplot.ylim((ymin-1, ymax))
  savefig('average_error.png')
  pyplot.figure(figsize=(5,6))
  ax = pyplot.gca()
  ax.boxplot(edges, widths=0.75)
  ax.set_xticklabels(labels, fontsize=12)
  savefig('average_edges.png')
Example #3
0
def average_error_rate(num_trials=100):
    knn_err = np.empty((num_trials, 2))
    eps_err = np.empty((num_trials, 2))
    bma_err = np.empty((num_trials, 2))
    msg_err = np.empty((num_trials, 2))
    for i in xrange(num_trials):
        X, GT = make_test_data(verify=True)
        D = pairwise_distances(X, metric='sqeuclidean')
        W = neighbor_graph(D, precomputed=True, k=5, symmetrize=True)
        knn_err[i] = error_ratio(W, GT, return_tuple=True)
        W = neighbor_graph(D, precomputed=True, epsilon=1.0)
        eps_err[i] = error_ratio(W, GT, return_tuple=True)
        W = hacky_b_matching(D, 5)
        bma_err[i] = error_ratio(W, GT, return_tuple=True)
        W = manifold_spanning_graph(X, 2)
        msg_err[i] = error_ratio(W, GT, return_tuple=True)
    errors = np.hstack(
        (knn_err[:, :1], eps_err[:, :1], bma_err[:, :1], msg_err[:, :1]))
    edges = np.hstack(
        (knn_err[:, 1:], eps_err[:, 1:], bma_err[:, 1:], msg_err[:, 1:]))
    labels = ('$k$-nearest', '$\\epsilon$-close', '$b$-matching', 'MSG')

    pyplot.figure(figsize=(5, 6))
    ax = pyplot.gca()
    ax.boxplot(errors, widths=0.75)
    ax.set_xticklabels(labels, fontsize=12)
    ymin, ymax = pyplot.ylim()
    pyplot.ylim((ymin - 1, ymax))
    savefig('average_error.png')
    pyplot.figure(figsize=(5, 6))
    ax = pyplot.gca()
    ax.boxplot(edges, widths=0.75)
    ax.set_xticklabels(labels, fontsize=12)
    savefig('average_edges.png')
Example #4
0
def compute_Ws(X, num_ccs):
    with Timer('Calculating pairwise distances...'):
        D = pairwise_distances(X, metric='sqeuclidean')
    np.save('mnist_D.npy', D)
    # k-nn
    with Timer('Calculating knn graph...'):
        for k in xrange(1, 10):
            Wknn = neighbor_graph(D, precomputed=True, k=k, symmetrize=True)
            n = connected_components(Wknn, directed=False, return_labels=False)
            if n <= num_ccs:
                break
        else:
            assert False, 'k too low'
    np.save('mnist_Wknn.npy', Wknn)
    print 'knn (k=%d)' % k

    # b-matching
    with Timer('Calculating b-matching graph...'):
        # using 8 decimal places kills the disk
        Wbma = hacky_b_matching(D, k, fmt='%.1f')
    np.save('mnist_Wbma.npy', Wbma)

    # msg
    with Timer('Calculating MSG graph...'):
        Wmsg = manifold_spanning_graph(X, 2, num_ccs=num_ccs)
    np.save('mnist_Wmsg.npy', Wmsg)

    return D, Wknn, Wbma, Wmsg
Example #5
0
def swiss_roll_experiment():
    embed_dim = 2
    X, GT = make_test_data(verify=True)

    plot_canonical_roll(X, GT)
    evaluate_sensitivity(X, GT)

    # kNN
    D = pairwise_distances(X, metric='sqeuclidean')
    for k in xrange(3, 10):
        Wknn = neighbor_graph(D, precomputed=True, k=k, symmetrize=True)
        n = connected_components(Wknn, directed=False, return_labels=False)
        if n == 1:
            break
    else:
        assert False, 'k too low'
    print 'k:', k, 'error:', error_ratio(Wknn, GT)
    plot_roll(Wknn, X, GT[:, 0], embed_dim, 'swiss_knn_result.png')

    # eball
    for eps in np.linspace(0.4, 1.2, 50):
        Weps = neighbor_graph(D,
                              precomputed=True,
                              epsilon=eps,
                              symmetrize=False)
        n = connected_components(Weps, directed=False, return_labels=False)
        if n == 1:
            break
    else:
        assert False, 'eps too low'
    print 'eps:', eps, 'error:', error_ratio(Weps, GT)
    plot_roll(Weps, X, GT[:, 0], embed_dim, 'swiss_eps_result.png')

    # b-matching
    for b in xrange(3, 10):
        Wbma = hacky_b_matching(D, b)
        n = connected_components(Wbma, directed=False, return_labels=False)
        if n == 1:
            break
    else:
        assert False, 'b too low'
    print 'b:', b, 'error:', error_ratio(Wbma, GT)
    plot_roll(Wbma, X, GT[:, 0], embed_dim, 'swiss_bma_result.png')

    # MSG
    Wmsg = manifold_spanning_graph(X, embed_dim)
    print 'MSG error:', error_ratio(Wmsg, GT)
    plot_roll(Wmsg, X, GT[:, 0], embed_dim, 'swiss_msg_result.png')
def swiss_roll_experiment():
  embed_dim = 2
  X, GT = make_test_data(verify=True)

  plot_canonical_roll(X, GT)
  evaluate_sensitivity(X, GT)

  # kNN
  D = pairwise_distances(X, metric='sqeuclidean')
  for k in xrange(3,10):
    Wknn = neighbor_graph(D, precomputed=True, k=k, symmetrize=True)
    n = connected_components(Wknn, directed=False, return_labels=False)
    if n == 1:
      break
  else:
    assert False, 'k too low'
  print 'k:', k, 'error:', error_ratio(Wknn, GT)
  plot_roll(Wknn, X, GT[:,0], embed_dim, 'swiss_knn_result.png')

  # eball
  for eps in np.linspace(0.4, 1.2, 50):
    Weps = neighbor_graph(D, precomputed=True, epsilon=eps, symmetrize=False)
    n = connected_components(Weps, directed=False, return_labels=False)
    if n == 1:
      break
  else:
    assert False, 'eps too low'
  print 'eps:', eps, 'error:', error_ratio(Weps, GT)
  plot_roll(Weps, X, GT[:,0], embed_dim, 'swiss_eps_result.png')

  # b-matching
  for b in xrange(3,10):
    Wbma = hacky_b_matching(D, b)
    n = connected_components(Wbma, directed=False, return_labels=False)
    if n == 1:
      break
  else:
    assert False, 'b too low'
  print 'b:', b, 'error:', error_ratio(Wbma, GT)
  plot_roll(Wbma, X, GT[:,0], embed_dim, 'swiss_bma_result.png')

  # MSG
  Wmsg = manifold_spanning_graph(X, embed_dim)
  print 'MSG error:', error_ratio(Wmsg, GT)
  plot_roll(Wmsg, X, GT[:,0], embed_dim, 'swiss_msg_result.png')