예제 #1
0
def mnist_eucl_proc(digits, num_points, num_avg):
    """Evaluate kmeans accuracy """

    eucl_dist = lambda a, b: np.linalg.norm(a - b)
    proc_dist1 = lambda a, b: procrustes.procrustes(a, b)
    proc_dist2 = lambda a, b: procrustes.procrustes2(a, b)
    proc_dist3 = lambda a, b: procrustes.procrustes3(a, b, 50)

    k = len(digits)
    a1, a2, a3, a4, a5 = [], [], [], [], []

    for i in range(num_avg):
        originals, shapes, ext_shapes, labels = pick_data([num_points] * k,
                                                          digits)

        l1, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist)
        l2, _, _, _ = kmeans.kmeans_(k, ext_shapes, proc_dist1)
        l3, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist3)
        l4, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist1)
        l5, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist2)

        a1.append(kmeans.accuracy(labels, l1))
        a2.append(kmeans.accuracy(labels, l2))
        a3.append(kmeans.accuracy(labels, l3))
        a4.append(kmeans.accuracy(labels, l4))
        a5.append(kmeans.accuracy(labels, l5))

    print "d_E = %f" % np.mean(a1)
    print "d_{P_0} = %f" % np.mean(a2)
    print "d_{P_3} = %f" % np.mean(a3)
    print "d_{P} = %f" % np.mean(a4)
    print "d_{P_l} = %f" % np.mean(a5)
예제 #2
0
def mnist_eucl_proc(digits, num_points, num_avg):
    """Evaluate kmeans accuracy """

    eucl_dist = lambda a, b: np.linalg.norm(a-b)
    proc_dist1 = lambda a, b: procrustes.procrustes(a, b)
    proc_dist2 = lambda a, b: procrustes.procrustes2(a, b)
    proc_dist3 = lambda a, b: procrustes.procrustes3(a, b, 50)
    
    k = len(digits)
    a1, a2, a3, a4, a5 = [], [], [], [], [] 
    
    for i in range(num_avg):
        originals, shapes, ext_shapes, labels = pick_data([num_points]*k, 
                                                            digits)
        
        l1, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist)
        l2, _, _, _ = kmeans.kmeans_(k, ext_shapes, proc_dist1)
        l3, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist3)
        l4, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist1)
        l5, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist2)
        
        a1.append(kmeans.accuracy(labels, l1))
        a2.append(kmeans.accuracy(labels, l2))
        a3.append(kmeans.accuracy(labels, l3))
        a4.append(kmeans.accuracy(labels, l4))
        a5.append(kmeans.accuracy(labels, l5))
    
    print "d_E = %f" % np.mean(a1)
    print "d_{P_0} = %f" % np.mean(a2)
    print "d_{P_3} = %f" % np.mean(a3)
    print "d_{P} = %f" % np.mean(a4)
    print "d_{P_l} = %f" % np.mean(a5)
예제 #3
0
def clustering_eucl(nrange, digits, num_sample, outfile):
    """Cluster originals and binaries with K-means/Euclidean."""
    
    eucl_dist = lambda a, b: np.linalg.norm(a-b)
    
    k = len(digits)
    a1, a2 = [], []
    for n in nrange:
        
        print "Doing %i of %i"%(n, nrange[-1])
        
        ns = [n]*k
        for m in range(num_sample):
            
            originals, shapes, ext_shapes, labels = pick_data(ns, digits)
            
            l1, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist)
            l2, _, _, _ = kmeans.kmeans_(k, shapes, eucl_dist)

            ac1 = kmeans.accuracy(labels, l1)
            ac2 = kmeans.accuracy(labels, l2)
            
            a1.append([n, ac1])
            a2.append([n, ac2])
            
            print '    ', ac1, ac2

    a1 = np.array(a1)
    a2 = np.array(a2)

    # plotting results
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(a1[:,0], a1[:,1], 'o', color='b', alpha=.5, label=r'$d_E$')
    ax.plot(a2[:,0], a2[:,1], 'o', color='r', alpha=.5, label=r'$d_{E_b}$')
   
    a1_avg, a2_avg = [], []
    for n in nrange:
        mu1 = a1[np.where(a1[:,0]==n)][:,1].mean()
        mu2 = a2[np.where(a2[:,0]==n)][:,1].mean()

        a1_avg.append([n, mu1])
        a2_avg.append([n, mu2])
    a1_avg = np.array(a1_avg)
    a2_avg = np.array(a2_avg)

    ax.plot(a1_avg[:,0], a1_avg[:,1], '-', color='b')
    ax.plot(a2_avg[:,0], a2_avg[:,1], '-', color='r')
    
    ax.set_xlabel(r'$N_i$')
    ax.set_ylabel(r'$A$')
    leg = ax.legend(loc=0)
    leg.get_frame().set_alpha(0.6)
    ax.set_title(r'$\{%s\}$'%(','.join([str(d) for d in digits])))
    fig.savefig(outfile)
예제 #4
0
def clustering_eucl(nrange, digits, num_sample, outfile):
    """Cluster originals and binaries with K-means/Euclidean."""

    eucl_dist = lambda a, b: np.linalg.norm(a - b)

    k = len(digits)
    a1, a2 = [], []
    for n in nrange:

        print "Doing %i of %i" % (n, nrange[-1])

        ns = [n] * k
        for m in range(num_sample):

            originals, shapes, ext_shapes, labels = pick_data(ns, digits)

            l1, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist)
            l2, _, _, _ = kmeans.kmeans_(k, shapes, eucl_dist)

            ac1 = kmeans.accuracy(labels, l1)
            ac2 = kmeans.accuracy(labels, l2)

            a1.append([n, ac1])
            a2.append([n, ac2])

            print '    ', ac1, ac2

    a1 = np.array(a1)
    a2 = np.array(a2)

    # plotting results
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(a1[:, 0], a1[:, 1], 'o', color='b', alpha=.5, label=r'$d_E$')
    ax.plot(a2[:, 0], a2[:, 1], 'o', color='r', alpha=.5, label=r'$d_{E_b}$')

    a1_avg, a2_avg = [], []
    for n in nrange:
        mu1 = a1[np.where(a1[:, 0] == n)][:, 1].mean()
        mu2 = a2[np.where(a2[:, 0] == n)][:, 1].mean()

        a1_avg.append([n, mu1])
        a2_avg.append([n, mu2])
    a1_avg = np.array(a1_avg)
    a2_avg = np.array(a2_avg)

    ax.plot(a1_avg[:, 0], a1_avg[:, 1], '-', color='b')
    ax.plot(a2_avg[:, 0], a2_avg[:, 1], '-', color='r')

    ax.set_xlabel(r'$N_i$')
    ax.set_ylabel(r'$A$')
    leg = ax.legend(loc=0)
    leg.get_frame().set_alpha(0.6)
    ax.set_title(r'$\{%s\}$' % (','.join([str(d) for d in digits])))
    fig.savefig(outfile)
def kmeans_procrustes(k, data, true_labels):
    def dist_func(X, Y):
        d = distance.procrustes(X, Y)
        print d
        return d
    labels, mus, obj, count = kmeans.kmeans_(k, data, dist_func, 30)
    error = clusval.class_error(true_labels, labels)
    return error
예제 #6
0
def kmeans_procrustes(k, data, true_labels):
    def dist_func(X, Y):
        d = distance.procrustes(X, Y)
        print d
        return d

    labels, mus, obj, count = kmeans.kmeans_(k, data, dist_func, 30)
    error = clusval.class_error(true_labels, labels)
    return error
예제 #7
0
def mnist_procrustes(digits, num_points, num_avg):
    proc_dist = lambda a, b: procrustes.procrustes(a, b)
    k = len(digits)
    a = [] 
    for i in range(num_avg):
        originals, shapes, ext_shapes, labels = pick_data([num_points]*k, 
                                                            digits)
        l, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist)
        accu = kmeans.accuracy(labels, l)
        a.append(accu)
        print accu
    print
    print "d_{P} = %f" % np.mean(a)
예제 #8
0
def mnist_euclidean(digits, num_points, num_avg):
    eucl_dist = lambda a, b: np.linalg.norm(a-b)
    k = len(digits)
    a = [] 
    for i in range(num_avg):
        originals, shapes, ext_shapes, labels = pick_data([num_points]*k, 
                                                            digits)
        l, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist)
        accu = kmeans.accuracy(labels, l)
        a.append(accu)
        print accu
    print
    print "d_E = %f" % np.mean(a)
예제 #9
0
def mnist_procrustes(digits, num_points, num_avg):
    proc_dist = lambda a, b: procrustes.procrustes(a, b)
    k = len(digits)
    a = []
    for i in range(num_avg):
        originals, shapes, ext_shapes, labels = pick_data([num_points] * k,
                                                          digits)
        l, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist)
        accu = kmeans.accuracy(labels, l)
        a.append(accu)
        print accu
    print
    print "d_{P} = %f" % np.mean(a)
예제 #10
0
def mnist_euclidean(digits, num_points, num_avg):
    eucl_dist = lambda a, b: np.linalg.norm(a - b)
    k = len(digits)
    a = []
    for i in range(num_avg):
        originals, shapes, ext_shapes, labels = pick_data([num_points] * k,
                                                          digits)
        l, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist)
        accu = kmeans.accuracy(labels, l)
        a.append(accu)
        print accu
    print
    print "d_E = %f" % np.mean(a)
예제 #11
0
def mnist_procrustes_filling(digits, num_points, num_avg):
    eucl_dist = lambda a, b: np.linalg.norm(a-b)
    proc_dist = lambda a, b: procrustes.procrustes(a, b)
    proc_dist_filling = lambda a, b: fill.procrustes_filling(a, b, N=40,
                                                        scale=200)
    k = len(digits)
    aa1 = []
    aa2 = []
    aa3 = []
    for i in range(num_avg):
        originals, shapes, ext_shapes, labels = pick_data([num_points]*k, 
                                                            digits)
        l1, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist)
        l2, _, _, _ = kmeans.kmeans_(k, ext_shapes, proc_dist)
        l3, _, _, _ = kmeans.kmeans_(k, ext_shapes, proc_dist_filling)
        a1 = kmeans.accuracy(labels, l1)
        a2 = kmeans.accuracy(labels, l2)
        a3 = kmeans.accuracy(labels, l3)
        aa1.append(a1)
        aa2.append(a2)
        aa3.append(a3)
    print "d_{E} = %f" % np.mean(aa1)
    print "d_{P} = %f" % np.mean(aa2)
    print "d_{F} = %f" % np.mean(aa3)
예제 #12
0
def mnist_procrustes_filling(digits, num_points, num_avg):
    eucl_dist = lambda a, b: np.linalg.norm(a - b)
    proc_dist = lambda a, b: procrustes.procrustes(a, b)
    proc_dist_filling = lambda a, b: fill.procrustes_filling(
        a, b, N=40, scale=200)
    k = len(digits)
    aa1 = []
    aa2 = []
    aa3 = []
    for i in range(num_avg):
        originals, shapes, ext_shapes, labels = pick_data([num_points] * k,
                                                          digits)
        l1, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist)
        l2, _, _, _ = kmeans.kmeans_(k, ext_shapes, proc_dist)
        l3, _, _, _ = kmeans.kmeans_(k, ext_shapes, proc_dist_filling)
        a1 = kmeans.accuracy(labels, l1)
        a2 = kmeans.accuracy(labels, l2)
        a3 = kmeans.accuracy(labels, l3)
        aa1.append(a1)
        aa2.append(a2)
        aa3.append(a3)
    print "d_{E} = %f" % np.mean(aa1)
    print "d_{P} = %f" % np.mean(aa2)
    print "d_{F} = %f" % np.mean(aa3)
예제 #13
0
def mnist_alignment(digits, num_points, num_avg):
    def dist_func(im1, im2):
        d = fill.euclidean_alignment(im1, im2)
        return d

    k = len(digits)
    a = []
    for i in range(num_avg):
        originals, shapes, ext_shapes, labels = pick_data([num_points] * k,
                                                          digits)
        l, _, _, _ = kmeans.kmeans_(k, originals, dist_func)
        accu = kmeans.accuracy(labels, l)
        a.append(accu)
        print accu
    print
    print "d_A = %f" % np.mean(a)
예제 #14
0
def mnist_alignment(digits, num_points, num_avg):
    
    def dist_func(im1, im2):
        d = fill.euclidean_alignment(im1, im2)
        return d

    k = len(digits)
    a = [] 
    for i in range(num_avg):
        originals, shapes, ext_shapes, labels = pick_data([num_points]*k, 
                                                          digits)
        l, _, _, _ = kmeans.kmeans_(k, originals, dist_func)
        accu = kmeans.accuracy(labels, l)
        a.append(accu)
        print accu
    print
    print "d_A = %f" % np.mean(a)
예제 #15
0
def kmeans_euclidean(k, data, true_labels):
    dist_func = distance.euclidean
    labels, mus, obj, count = kmeans.kmeans_(k, data, dist_func, 30)
    error = clusval.class_error(true_labels, labels)
    return error
예제 #16
0
def mnist_standard_vs_procrustes(nrange, digits, num_sample, outfile):
    """Plot accuracy when clustering MNIST digits, using procrustes
    and Euclidean distance.
    
    """
    
    eucl_dist = lambda a, b: np.linalg.norm(a-b)
    proc_dist1 = lambda a, b: procrustes.procrustes(a, b)
    proc_dist2 = lambda a, b: procrustes.procrustes2(a, b)
    proc_dist3 = lambda a, b: procrustes.procrustes3(a, b, 50)
    
    k = len(digits)
    a1, a2, a3, a4, a5 = [], [], [], [], [] 
    for n in nrange:
        
        print "Doing %i of %i"%(n, nrange[-1])
        
        ns = [n]*k
        for m in range(num_sample):
            
            originals, shapes, ext_shapes, labels = pick_data(ns, digits)
            
            l1, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist)
            l2, _, _, _ = kmeans.kmeans_(k, ext_shapes, proc_dist1)
            l3, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist3)
            l4, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist1)
            l5, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist2)

            ac1 = kmeans.accuracy(labels, l1)
            ac2 = kmeans.accuracy(labels, l2)
            ac3 = kmeans.accuracy(labels, l3)
            ac4 = kmeans.accuracy(labels, l4)
            ac5 = kmeans.accuracy(labels, l5)
            
            a1.append([n, ac1])
            a2.append([n, ac2])
            a3.append([n, ac3])
            a4.append([n, ac4])
            a5.append([n, ac5])
            
            print '    ', ac1, ac2, ac3, ac4, ac5

    a1 = np.array(a1)
    a2 = np.array(a2)
    a3 = np.array(a3)
    a4 = np.array(a4)
    a5 = np.array(a5)

    # plotting results
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(a1[:,0], a1[:,1], 'o', color='b', alpha=.5, label=r'$d_E$')
    ax.plot(a2[:,0], a2[:,1], 'o', color='r', alpha=.5, label=r'$d_{P_0}$')
    ax.plot(a3[:,0], a3[:,1], 'o', color='g', alpha=.5, label=r'$d_{P_3}$')
    ax.plot(a4[:,0], a4[:,1], 'o', color='c', alpha=.5, label=r'$d_{P}$')
    ax.plot(a5[:,0], a5[:,1], 'o', color='m', alpha=.5, label=r'$d_{P_l}$')
   
    a1_avg, a2_avg, a3_avg, a4_avg, a5_avg = [], [], [], [], []
    for n in nrange:
        mu1 = a1[np.where(a1[:,0]==n)][:,1].mean()
        mu2 = a2[np.where(a2[:,0]==n)][:,1].mean()
        mu3 = a3[np.where(a3[:,0]==n)][:,1].mean()
        mu4 = a4[np.where(a4[:,0]==n)][:,1].mean()
        mu5 = a5[np.where(a5[:,0]==n)][:,1].mean()

        a1_avg.append([n, mu1])
        a2_avg.append([n, mu2])
        a3_avg.append([n, mu3])
        a4_avg.append([n, mu4])
        a5_avg.append([n, mu5])
    a1_avg = np.array(a1_avg)
    a2_avg = np.array(a2_avg)
    a3_avg = np.array(a3_avg)
    a4_avg = np.array(a4_avg)
    a5_avg = np.array(a5_avg)

    ax.plot(a1_avg[:,0], a1_avg[:,1], '-', color='b')
    ax.plot(a2_avg[:,0], a2_avg[:,1], '-', color='r')
    ax.plot(a3_avg[:,0], a3_avg[:,1], '-', color='g')
    ax.plot(a4_avg[:,0], a4_avg[:,1], '-', color='c')
    ax.plot(a5_avg[:,0], a5_avg[:,1], '-', color='m')
    
    ax.set_xlabel(r'$N_i$')
    ax.set_ylabel(r'$A$')
    leg = ax.legend(loc=0)
    leg.get_frame().set_alpha(0.6)
    ax.set_title(r'$\{%s\}$'%(','.join([str(d) for d in digits])))
    fig.savefig(outfile)
def kmeans_euclidean(k, data, true_labels):
    dist_func = distance.euclidean
    labels, mus, obj, count = kmeans.kmeans_(k, data, dist_func, 30)
    error = clusval.class_error(true_labels, labels)
    return error
예제 #18
0
def mnist_standard_vs_procrustes(nrange, digits, num_sample, outfile):
    """Plot accuracy when clustering MNIST digits, using procrustes
    and Euclidean distance.
    
    """

    eucl_dist = lambda a, b: np.linalg.norm(a - b)
    proc_dist1 = lambda a, b: procrustes.procrustes(a, b)
    proc_dist2 = lambda a, b: procrustes.procrustes2(a, b)
    proc_dist3 = lambda a, b: procrustes.procrustes3(a, b, 50)

    k = len(digits)
    a1, a2, a3, a4, a5 = [], [], [], [], []
    for n in nrange:

        print "Doing %i of %i" % (n, nrange[-1])

        ns = [n] * k
        for m in range(num_sample):

            originals, shapes, ext_shapes, labels = pick_data(ns, digits)

            l1, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist)
            l2, _, _, _ = kmeans.kmeans_(k, ext_shapes, proc_dist1)
            l3, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist3)
            l4, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist1)
            l5, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist2)

            ac1 = kmeans.accuracy(labels, l1)
            ac2 = kmeans.accuracy(labels, l2)
            ac3 = kmeans.accuracy(labels, l3)
            ac4 = kmeans.accuracy(labels, l4)
            ac5 = kmeans.accuracy(labels, l5)

            a1.append([n, ac1])
            a2.append([n, ac2])
            a3.append([n, ac3])
            a4.append([n, ac4])
            a5.append([n, ac5])

            print '    ', ac1, ac2, ac3, ac4, ac5

    a1 = np.array(a1)
    a2 = np.array(a2)
    a3 = np.array(a3)
    a4 = np.array(a4)
    a5 = np.array(a5)

    # plotting results
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(a1[:, 0], a1[:, 1], 'o', color='b', alpha=.5, label=r'$d_E$')
    ax.plot(a2[:, 0], a2[:, 1], 'o', color='r', alpha=.5, label=r'$d_{P_0}$')
    ax.plot(a3[:, 0], a3[:, 1], 'o', color='g', alpha=.5, label=r'$d_{P_3}$')
    ax.plot(a4[:, 0], a4[:, 1], 'o', color='c', alpha=.5, label=r'$d_{P}$')
    ax.plot(a5[:, 0], a5[:, 1], 'o', color='m', alpha=.5, label=r'$d_{P_l}$')

    a1_avg, a2_avg, a3_avg, a4_avg, a5_avg = [], [], [], [], []
    for n in nrange:
        mu1 = a1[np.where(a1[:, 0] == n)][:, 1].mean()
        mu2 = a2[np.where(a2[:, 0] == n)][:, 1].mean()
        mu3 = a3[np.where(a3[:, 0] == n)][:, 1].mean()
        mu4 = a4[np.where(a4[:, 0] == n)][:, 1].mean()
        mu5 = a5[np.where(a5[:, 0] == n)][:, 1].mean()

        a1_avg.append([n, mu1])
        a2_avg.append([n, mu2])
        a3_avg.append([n, mu3])
        a4_avg.append([n, mu4])
        a5_avg.append([n, mu5])
    a1_avg = np.array(a1_avg)
    a2_avg = np.array(a2_avg)
    a3_avg = np.array(a3_avg)
    a4_avg = np.array(a4_avg)
    a5_avg = np.array(a5_avg)

    ax.plot(a1_avg[:, 0], a1_avg[:, 1], '-', color='b')
    ax.plot(a2_avg[:, 0], a2_avg[:, 1], '-', color='r')
    ax.plot(a3_avg[:, 0], a3_avg[:, 1], '-', color='g')
    ax.plot(a4_avg[:, 0], a4_avg[:, 1], '-', color='c')
    ax.plot(a5_avg[:, 0], a5_avg[:, 1], '-', color='m')

    ax.set_xlabel(r'$N_i$')
    ax.set_ylabel(r'$A$')
    leg = ax.legend(loc=0)
    leg.get_frame().set_alpha(0.6)
    ax.set_title(r'$\{%s\}$' % (','.join([str(d) for d in digits])))
    fig.savefig(outfile)