trainy = np.concatenate((np.zeros( (1, train_size_boys)), np.ones((1, train_size_girls))), axis=1) testy = np.concatenate((np.zeros( (1, test_size_boys)), np.ones((1, test_size_girls))), axis=1) #plt.figure(1) #show_image.show_image_function(trainx.T, 65, 65) trainno = np.shape(trainy)[1] dimno = np.shape(trainx)[0] testno = np.shape(testy)[1] plt.figure(1) plt.ion() show_image.show_image_function(testx.T, 65, 65) plt.axis('off') plt.show() plt.ioff() raw_input('press a key to continue ....\n') plt.close("all") print 'please wait for Naive Bayes!' ## # Naive Bayes # estimate class prior distribution; py = np.zeros((2, 1)) for i in range(0, 2): py[i, 0] = np.sum(trainy == i) * 1.0 / trainno
# , data[:, indices_test, 3], data[:, indices_test, 4] \ # , data[:, indices_test, 5], data[:, indices_test, 6] \ # , data[:, indices_test, 7], data[:, indices_test, 8]\ ), axis = 1) testy = np.concatenate((np.zeros((1, test_size)), np.ones((1, test_size)) \ , 2 * np.ones((1, test_size)) \ # , 3 * np.ones((1, test_size)) \ # , 4 * np.ones((1, test_size)), 5 * np.ones((1, test_size)) \ # , 6 * np.ones((1, test_size)), 7 * np.ones((1, test_size)) \ # , 8 * np.ones((1, test_size)), 9 * np.ones((1, test_size)) \ ), axis = 1) testno = np.shape(testy)[1] plt.figure() plt.ion() show_image.show_image_function(testx.T, 16, 16) plt.axis('off') plt.show() plt.ioff() raw_input('press a key to continue ....\n') plt.close("all") print 'please wait for Naive Bayes!' ## # Naive Bayes # estimate class prior distribution; py = np.zeros((num, 1)) for i in range(0,num):
X2 = data[:, :, 2].T Y = np.concatenate((np.ones((1100, 1)), -np.ones((1100, 1))), axis=0) H = 16 W = 16 # Create a training set Xtrain = np.concatenate((X[0:int(1100 * 0.8), :], X[1100:1980, :]), axis=0) Ytrain = np.concatenate((Y[0:int(1100 * 0.8), :], Y[1100:1980, :]), axis=0) # test set Xtest = np.concatenate((X[int(1100 * 0.8):1100, :], X[1980:2200, :]), axis=0) Ytest = np.concatenate((Y[int(1100 * 0.8):1100, :], Y[1980:2200, :]), axis=0) plt.figure() show_image.show_image_function(Xtest, H, W) plt.axis('off') plt.title('Test set') train_size = Ytrain.shape[0] test_size = Ytest.shape[0] Xtrain = Xtrain.astype(float) Xtest = Xtest.astype(float) print '--running svm\n' beta, beta0 = svm.svm_function(Xtrain, Ytrain, 1) Y_hat_train = np.sign(Xtrain.dot(beta.T) + beta0) precision1 = np.sum((Ytrain == Y_hat_train)
# (2) Both of them have 1100 data points. # read data load1 = sio.loadmat('usps_all.mat') data = load1['data'].astype(float) # class 1: digit 1 # class 2: digit 2 X = np.concatenate((data[:, :, 0].T, data[:, : ,1].T), axis = 0) X_row = X.shape[0] X_col = X.shape[1] H = 16 W = 16 plt.figure() show_image.show_image_function(X, H, W) plt.axis('off') plt.title('Whole Dataset of Digit 1 and Digit 2') ## Separate the dataset into training and testing nclass1 = data[:,:,0].shape[1] # 1100 nclass2 = data[:,:,1].shape[1] # 1100 Y = np.concatenate((np.ones((nclass1, 1)), 2 * np.ones((nclass2, 1))), axis = 0) # Use p percent data as training data p = 0.8 nclass1_train = np.round(nclass1 * p).astype(int) nclass1_test = nclass1 - nclass1_train nclass2_train = np.round(nclass2 * p).astype(int) nclass2_test = nclass2 - nclass2_train
# Loading the upsc digit dataset matFile = sio.loadmat('usps_all.mat') data = matFile['data'] pixelno = data.shape[0] digitno = data.shape[1] classno = data.shape[2] # Displaying the digit 1(data(:,:,1)) and 0(data(:,:,10)) in the dataset H = 16 W = 16 plt.figure(1) digits_01 = np.concatenate((np.array(data[:, :, 0]), np.array(data[:, :, 9])), axis=1).T show_image.show_image_function(digits_01, H, W) plt.title('digit 1 and 0') #plt.figure(1) #show_image.show_image_function(np.array(data[:,:,9]).T, H, W) #plt.title('digit 2') # Extracting the digits 1 and 0 and converting into double x0 = np.concatenate((np.array(data[:, :, 0]), np.array(data[:, :, 9])), axis=1) x = np.array(x0, dtype=np.float) y = np.concatenate((np.ones((1, digitno)), 2 * np.ones((1, digitno))), axis=1) # number of data points to work with; m = x.shape[1] # Normalize the data Anew = x.T
matFile = sio.loadmat('usps_all.mat') # data is 256 x 1100 x 10, consisting of # 1100 16x16 images of 10 digits. data = matFile['data'] pixelno = data.shape[0] digitno = data.shape[1] classno = data.shape[2] H = 16 W = 16 plt.figure(0) # Display all images of digits 1 and 0. digits_01 = np.concatenate((np.array(data[:,:,0]), np.array(data[:,:,9])), axis = 1).T show_image.show_image_function(digits_01, H, W) plt.title('digit 1 and 0') # Create data consisting only 1 and 0. # x is the images, y is the labels. x0 = np.array(data[:, :, [0,9]]).reshape((pixelno, digitno * 2)) x = np.array((data[:, :, [0,9]]).reshape((pixelno, digitno * 2)), dtype=np.float) y = np.concatenate((np.ones((1,digitno)), 2 * np.ones((1,digitno))), axis = 1) # number of data points to work with; m = x.shape[1] ############################################################################### ## k-means algorithm; # Greedy algorithm trying to minimize the objective function; # A highly vectorized version of kmeans.