Example #1
0
def local_kmeans_class(I, L, x, k):
    from scipy.spatial.distance import cdist

    sizex = len(np.atleast_2d(x))
    label = np.zeros((sizex,k))
    for rowsx in range(0, sizex):
        tic()
        dists = cdist(I, np.atleast_2d(x[rowsx]), metric='euclidean')
        toc()
        center = np.zeros((10,k,28*28))
        label_order = np.unique(L)
        l=0
        tic()
        thing = np.zeros((k,28*28))
        for labs in np.unique(L):
            indices = L == labs
            k_smallest = np.argpartition(dists[indices],tuple(range(1,k)),axis=None)
            for i in range(0,k):
                M = I[indices]
                #center[l,i,:] = np.average(M[k_smallest[:i+1]],axis = 0)
                if i == 0:
                    thing[i] = M[k_smallest[i+1]]
                else:
                    thing[i] = thing[i-1] + M[k_smallest[i+1]]
            center[l,:,:] = np.divide(thing,np.repeat(np.arange(1,11).reshape(10,1),28*28,axis=1))
            l+=1
        toc()
        for i in range(k):
            #print(cdist(center[:,i,:], np.atleast_2d(x[rowsx]), metric='euclidean'))
            dists2center = cdist(center[:,i,:], np.atleast_2d(x[rowsx]), metric='euclidean')
            k_smallest = np.argpartition(dists2center,tuple(range(1)),axis=None)
            label[rowsx,i] = label_order[k_smallest[0]]
    return label
Example #2
0
def main():
    digits = mnist() # Creates a class with our mnist images and labels
    if open('Training SVD Data','rb')._checkReadable() == 0: # Check if file exist create it if it doesn't
        print("im here")
        x = center_matrix_SVD(digits.train_Images) # Creates a class with our svd and associated info
        pickle.dump(x,open('Training SVD Data','wb'))
    else:
        x = pickle.load(open('Training SVD Data','rb'))  # If we already have the file just load it
    if 0:
        test_Images_Center = np.subtract(digits.test_Images,np.repeat(x.centers,digits.test_Images.shape[0],0))
        tic()
        labels = local_kmeans_class(x.PCA[:,:50],digits.train_Labels,[email protected](x.V[:50,:]),10)
        toc()
        pickle.dump(labels,open('Loc_kmeans_50_lab','wb'))
    loc_full = pickle.load(open('Loc_kmeans_Full_lab','rb'))
    loc_50 = pickle.load(open('Loc_kmeans_50_lab','rb'))
    labels_Full = pickle.load(open('KNN_Full','rb'))
    # Have to transpose these because they came out backwards should fix if i use this agian
    errors_full,ind_full = class_error_rate(np.transpose(loc_full),digits.test_labels)
    errors_50,ind_50 = class_error_rate(np.transpose(loc_50),digits.test_labels)
    errors_near,ind_50 = class_error_rate(labels_Full,digits.test_labels)
    plt.figure()
    plt.plot(np.arange(10)+1, errors_full, color='Green', marker='o', markersize=10, label='Full')  #plots the 82.5%
    plt.plot(np.arange(10)+1, errors_50, color='Yellow', marker='o', markersize=10, label='82.5%')
    plt.plot(np.arange(10)+1, errors_near, color='Blue', marker='o', markersize=10, label='kNN')
    plt.grid(1) # Turns the grid on
    plt.title('Plot of local KNN Error rates')
    plt.legend(loc='upper right') # Puts a legend on the plot
    plt.show()
Example #3
0
def main():
    digits = mnist()  # Creates a class with our mnist images and labels
    if open('Training SVD Data', 'rb')._checkReadable(
    ) == 0:  # Check if file exist create it if it doesn't
        print("im here")  # Just wanted to check if it was going in here
        x = center_matrix_SVD(
            digits.train_Images
        )  # Creates a class with our svd and associated info
        pickle.dump(x, open('Training SVD Data', 'wb'))
    else:
        x = pickle.load(open('Training SVD Data',
                             'rb'))  # If we already have the file just load it
    if 0:  # if this is zero skip
        test_Images_Center = np.subtract(
            digits.test_Images,
            np.repeat(x.centers, digits.test_Images.shape[0], 0))
        tic()
        myLDA = LDA()  # Create a new instance of the LDA class
        new_train = myLDA.fit_transform(
            x.PCA[:, :154], digits.train_Labels)  # It will fit based on x.PCA
        new_test = myLDA.transform(test_Images_Center @ np.transpose(
            x.V[:154, :]))  # get my transformed test dataset
        Knn_labels, nearest = KNN(new_train, digits.train_Labels, new_test,
                                  10)  # Run kNN on the new data
        toc()
        pickle.dump(Knn_labels, open('FDAKNN_Lables', 'wb'))
        pickle.dump(nearest, open('FDAKNN_neastest', 'wb'))
    fda = pickle.load(open('FDAKNN_Lables', 'rb'))
    labels_Full = pickle.load(open('KNN_Full', 'rb'))
    labels_50 = pickle.load(open('KNN_50', 'rb'))
    errors_fda, ind_fda = class_error_rate(fda, digits.test_labels)
    errors_near, ind_near = class_error_rate(labels_Full, digits.test_labels)
    errors_50, ind_50 = class_error_rate(labels_50, digits.test_labels)
    plt.figure()
    plt.plot(np.arange(10) + 1,
             errors_fda,
             color='Green',
             marker='o',
             markersize=10,
             label='fda')  #plots the 82.5%
    plt.plot(np.arange(10) + 1,
             errors_near,
             color='Blue',
             marker='o',
             markersize=10,
             label='kNN')
    plt.plot(np.arange(10) + 1,
             errors_50,
             color='Yellow',
             marker='o',
             markersize=10,
             label='kNN 50')
    plt.grid(1)  # Turns the grid on
    plt.title('Plot of Knn with FDA Error rates')
    plt.legend(loc='upper right')  # Puts a legend on the plot
    plt.show()
    print(confusion_matrix(digits.test_labels, labels_Full[5]))
    print(confusion_matrix(digits.test_labels, fda[5]))
    print(confusion_matrix(digits.test_labels, labels_50[5]))
    """
Example #4
0
def main():
    digits = mnist()  # Creates a class with our mnist images and labels
    if open('Training SVD Data', 'rb')._checkReadable(
    ) == 0:  # Check if file exist create it if it doesn't
        print("im here")
        x = center_matrix_SVD(
            digits.train_Images
        )  # Creates a class with our svd and associated info
        pickle.dump(x, open('Training SVD Data', 'wb'))
    else:
        x = pickle.load(open('Training SVD Data',
                             'rb'))  # If we already have the file just load it
    if 0:
        test_Images_Center = np.subtract(
            digits.test_Images,
            np.repeat(x.centers, digits.test_Images.shape[0], 0))
        tic()
        labels = local_kmeans_class(
            x.PCA[:, :50], digits.train_Labels,
            test_Images_Center @ np.transpose(x.V[:50, :]), 10)
        toc()
        pickle.dump(labels, open('Loc_kmeans_50_lab', 'wb'))
    loc_full = pickle.load(open('Loc_kmeans_Full_lab', 'rb'))
    loc_50 = pickle.load(open('Loc_kmeans_50_lab', 'rb'))
    labels_Full = pickle.load(open('KNN_Full', 'rb'))
    # Have to transpose these because they came out backwards should fix if i use this agian
    errors_full, ind_full = class_error_rate(np.transpose(loc_full),
                                             digits.test_labels)
    errors_50, ind_50 = class_error_rate(np.transpose(loc_50),
                                         digits.test_labels)
    errors_near, ind_50 = class_error_rate(labels_Full, digits.test_labels)
    plt.figure()
    plt.plot(np.arange(10) + 1,
             errors_full,
             color='Green',
             marker='o',
             markersize=10,
             label='Full')  #plots the 82.5%
    plt.plot(np.arange(10) + 1,
             errors_50,
             color='Yellow',
             marker='o',
             markersize=10,
             label='82.5%')
    plt.plot(np.arange(10) + 1,
             errors_near,
             color='Blue',
             marker='o',
             markersize=10,
             label='kNN')
    plt.grid(1)  # Turns the grid on
    plt.title('Plot of local KNN Error rates')
    plt.legend(loc='upper right')  # Puts a legend on the plot
    plt.show()
Example #5
0
def KNN(I, L, x, k, weights=1):
    from scipy import stats
    from scipy.spatial.distance import cdist
    """
    I is the matrix of obs
    L are the labels
    x is what we are trying to classify
    k are how many neighbors to look at or whatever
    first we want to create a matrix of distances from each
    object we want to classify to every object in our training set
    """
    sizex = len(np.atleast_2d(x))
    label = np.zeros((k, sizex))
    for rowsx in range(0, sizex):
        tic()
        dists = cdist(I, np.atleast_2d(x[rowsx]), metric='euclidean')
        # Now we should have all our distances in our dist array
        # The next step is to use this info to classify each unknown obj
        k_smallest = np.argpartition(dists, tuple(range(1, k + 1)), axis=None)
        if weights == 1:
            for i in range(0, k):
                label[i, rowsx] = stats.mode(L[k_smallest[:i + 1]])[0]
        else:
            labs = np.unique(L)
            myimage = x[rowsx].reshape(28, 28)
            for i in range(k):
                lab_weighted = np.zeros(np.unique(L).shape[0])
                d = dists[k_smallest[:i + 2]][:, 0]
                weight_function = np.add(
                    np.divide(d, np.subtract(np.min(d), np.max(d))),
                    1 - np.min(d) / np.subtract(np.min(d), np.max(d)))
                for p in range(0, labs.shape[0]):
                    indices = inboth(
                        np.arange(0, L.shape[0])[L == labs[p]],
                        k_smallest[:i + 2])
                    lab_weighted[p] = np.sum(
                        np.multiply(weight_function, indices))
                label[i, rowsx] = labs[np.argmax(lab_weighted)]
        toc()
        print(rowsx)
    return label
Example #6
0
def local_kmeans_class(I, L, x, k):
    from scipy.spatial.distance import cdist

    sizex = len(np.atleast_2d(x))
    label = np.zeros((sizex, k))
    for rowsx in range(0, sizex):
        tic()
        dists = cdist(I, np.atleast_2d(x[rowsx]), metric='euclidean')
        toc()
        center = np.zeros((10, k, 28 * 28))
        label_order = np.unique(L)
        l = 0
        tic()
        thing = np.zeros((k, 28 * 28))
        for labs in np.unique(L):
            indices = L == labs
            k_smallest = np.argpartition(dists[indices],
                                         tuple(range(1, k)),
                                         axis=None)
            for i in range(0, k):
                M = I[indices]
                #center[l,i,:] = np.average(M[k_smallest[:i+1]],axis = 0)
                if i == 0:
                    thing[i] = M[k_smallest[i + 1]]
                else:
                    thing[i] = thing[i - 1] + M[k_smallest[i + 1]]
            center[l, :, :] = np.divide(
                thing,
                np.repeat(np.arange(1, 11).reshape(10, 1), 28 * 28, axis=1))
            l += 1
        toc()
        for i in range(k):
            #print(cdist(center[:,i,:], np.atleast_2d(x[rowsx]), metric='euclidean'))
            dists2center = cdist(center[:, i, :],
                                 np.atleast_2d(x[rowsx]),
                                 metric='euclidean')
            k_smallest = np.argpartition(dists2center,
                                         tuple(range(1)),
                                         axis=None)
            label[rowsx, i] = label_order[k_smallest[0]]
    return label
Example #7
0
def main():
    digits = mnist() # Creates a class with our mnist images and labels
    if open('Training SVD Data','rb')._checkReadable() == 0: # Check if file exist create it if it doesn't
        x = center_matrix_SVD(digits.train_Images) # Creates a class with our svd and associated info
        pickle.dump(x,open('Training SVD Data','wb'))
    else:
        x = pickle.load(open('Training SVD Data','rb'))  # If we already have the file just load it
    if 1: # if this is zero skip
        test_Images_Center = np.subtract(digits.test_Images,np.repeat(x.centers,digits.test_Images.shape[0],0))
        tic()
        myLDA = LDA()  # Create a new instance of the LDA class
        new_train = myLDA.fit_transform(x.PCA[:,:154],digits.train_Labels)  # It will fit based on x.PCA
        new_test = myLDA.transform([email protected](x.V[:154,:])) # get my transformed test dataset
        Knn_labels = local_kmeans_class(new_train,digits.train_Labels,new_test,10) # Run kNN on the new data
        toc()
        pickle.dump(Knn_labels,open('Loc_kmeans_fda_lab','wb'))

    fda = pickle.load(open('Loc_kmeans_fda_lab','rb'))
    labels_Full = pickle.load(open('KNN_Full','rb'))
    loc_full = pickle.load(open('Loc_kmeans_Full_lab','rb'))
    errors_fda,ind_fda = class_error_rate(np.transpose(fda),digits.test_labels)
    errors_near,ind_near = class_error_rate(labels_Full,digits.test_labels)
    errors_full,ind_full = class_error_rate(np.transpose(loc_full),digits.test_labels)
    labels_50 = pickle.load(open('KNN_50','rb'))
    errors_50,ind_50 = class_error_rate(labels_50,digits.test_labels)
    print(errors_full)
    plt.figure()
    plt.plot(np.arange(10)+1, errors_fda, color='Green', marker='o', markersize=10, label='fda Kmeans')  #plots the 82.5%
    plt.plot(np.arange(10)+1, errors_near, color='Blue', marker='o', markersize=10, label='kNN')
    plt.plot(np.arange(10)+1, errors_full, color='Yellow', marker='o', markersize=10, label='Full Kmeans')
    plt.plot(np.arange(10)+1, errors_50, color='Red', marker='o', markersize=10, label='kNN 50')
    axes = plt.gca()
    axes.set_ylim([0.015,0.12])
    plt.grid(1) # Turns the grid on
    plt.title('Plot of Local Kmeans with FDA Error rates')
    plt.legend(loc='upper right')  # Puts a legend on the plot
    plt.show()
    project_back(x,digits)
Example #8
0
def main():
    digits = mnist() # Creates a class with our mnist images and labels
    if open('Training SVD Data','rb')._checkReadable() == 0: # Check if file exist create it if it doesn't
        print("im here")   # Just wanted to check if it was going in here
        x = center_matrix_SVD(digits.train_Images) # Creates a class with our svd and associated info
        pickle.dump(x,open('Training SVD Data','wb'))
    else:
        x = pickle.load(open('Training SVD Data','rb'))  # If we already have the file just load it
    if 0: # if this is zero skip
        test_Images_Center = np.subtract(digits.test_Images,np.repeat(x.centers,digits.test_Images.shape[0],0))
        tic()
        myLDA = LDA()  # Create a new instance of the LDA class
        new_train = myLDA.fit_transform(x.PCA[:,:154],digits.train_Labels)  # It will fit based on x.PCA
        new_test = myLDA.transform([email protected](x.V[:154,:])) # get my transformed test dataset
        Knn_labels, nearest = KNN(new_train,digits.train_Labels,new_test,10) # Run kNN on the new data
        toc()
        pickle.dump(Knn_labels,open('FDAKNN_Lables','wb'))
        pickle.dump(nearest,open('FDAKNN_neastest','wb'))
    fda = pickle.load(open('FDAKNN_Lables','rb'))
    labels_Full = pickle.load(open('KNN_Full','rb'))
    labels_50 = pickle.load(open('KNN_50','rb'))
    errors_fda,ind_fda = class_error_rate(fda,digits.test_labels)
    errors_near,ind_near = class_error_rate(labels_Full,digits.test_labels)
    errors_50,ind_50 = class_error_rate(labels_50,digits.test_labels)
    plt.figure()
    plt.plot(np.arange(10)+1, errors_fda, color='Green', marker='o', markersize=10, label='fda')  #plots the 82.5%
    plt.plot(np.arange(10)+1, errors_near, color='Blue', marker='o', markersize=10, label='kNN')
    plt.plot(np.arange(10)+1, errors_50, color='Yellow', marker='o', markersize=10, label='kNN 50')
    plt.grid(1) # Turns the grid on
    plt.title('Plot of Knn with FDA Error rates')
    plt.legend(loc='upper right')  # Puts a legend on the plot
    plt.show()
    print(confusion_matrix(digits.test_labels,labels_Full[5]))
    print(confusion_matrix(digits.test_labels,fda[5]))
    print(confusion_matrix(digits.test_labels,labels_50[5]))
    """
Example #9
0
def KNN(I, L, x, k,weights = 1):
    from scipy import stats
    from scipy.spatial.distance import cdist
    """
    I is the matrix of obs
    L are the labels
    x is what we are trying to classify
    k are how many neighbors to look at or whatever
    first we want to create a matrix of distances from each
    object we want to classify to every object in our training set
    """
    sizex = len(np.atleast_2d(x))
    label = np.zeros((k,sizex))
    for rowsx in range(0, sizex):
        tic()
        dists = cdist(I, np.atleast_2d(x[rowsx]), metric='euclidean')
        # Now we should have all our distances in our dist array
        # The next step is to use this info to classify each unknown obj
        k_smallest = np.argpartition(dists,tuple(range(1,k+1)),axis=None)
        if weights == 1:
            for i in range(0,k):
                label[i,rowsx] = stats.mode(L[k_smallest[:i+1]])[0]
        else:
            labs = np.unique(L)
            myimage = x[rowsx].reshape(28,28)
            for i in range(k):
                lab_weighted = np.zeros(np.unique(L).shape[0])
                d = dists[k_smallest[:i+2]][:,0]
                weight_function = np.add(np.divide(d, np.subtract(np.min(d),np.max(d))),1-np.min(d)/np.subtract(np.min(d),np.max(d)))
                for p in range(0,labs.shape[0]):
                    indices = inboth(np.arange(0,L.shape[0])[L == labs[p]],k_smallest[:i+2])
                    lab_weighted[p]= np.sum(np.multiply(weight_function,indices))
                label[i,rowsx] = labs[np.argmax(lab_weighted)]
        toc()
        print(rowsx)
    return label
Example #10
0
"""
label = pickle.load(open('kNNWeight.p', 'rb'))
from sklearn.metrics import confusion_matrix
import pandas
print(test_labels.shape)
x = confusion_matrix(test_labels,label[2])
error = np.zeros(10)
for n in range(10):
    error[n] = 1-x[n,n]/(np.sum(x,axis=0)[n])
print(error)
import matplotlib.pyplot as plt
plt.plot(range(10),error)
plt.show()
print(pandas.DataFrame(x,range(10),range(10)))
"""
"""
tic()
m = mfoldX(train_Images[:6000], train_Labels[:6000], 6, 10)
print(m)
toc()
pickle.dump(m, open('kisfive.p', 'wb'))
"""
"""
import matplotlib.pyplot as plt
m = pickle.load(open('kisfive.p', 'rb'))
plt.plot(range(1,11),m)
plt.show()
"""
"""
from sklearn import neighbors, datasets
Example #11
0
"""
label = pickle.load(open('kNNWeight.p', 'rb'))
from sklearn.metrics import confusion_matrix
import pandas
print(test_labels.shape)
x = confusion_matrix(test_labels,label[2])
error = np.zeros(10)
for n in range(10):
    error[n] = 1-x[n,n]/(np.sum(x,axis=0)[n])
print(error)
import matplotlib.pyplot as plt
plt.plot(range(10),error)
plt.show()
print(pandas.DataFrame(x,range(10),range(10)))
"""
"""
tic()
m = mfoldX(train_Images[:6000], train_Labels[:6000], 6, 10)
print(m)
toc()
pickle.dump(m, open('kisfive.p', 'wb'))
"""
"""
import matplotlib.pyplot as plt
m = pickle.load(open('kisfive.p', 'rb'))
plt.plot(range(1,11),m)
plt.show()
"""
"""
from sklearn import neighbors, datasets