def verify(): prepare_data('http://jwxt.jit.edu.cn/CheckCode.aspx') all_theta = matrix(loadtxt('theta.dat')) X = matrix(loadtxt('cache/X.dat')) / 255.0 acc, pred = predictOneVsAll(all_theta, X) answers = map(chr, map(lambda x: x + 48 if x <= 9 else x + 87 if x <= 23 else x + 88, pred)) return ''.join(answers)
def output(part_id): # Random Test Cases X = np.column_stack((np.ones(20), (np.exp(1) * np.sin(np.linspace(1, 20, 20))), (np.exp(0.5) * np.cos(np.linspace(1, 20, 20))))) y = np.sin(X[:,0] + X[:,1]) > 0 Xm = np.array([[-1,-1],[-1,-2],[-2,-1],[-2,-2],[1,1],[1,2],[2,1],[2,2],[-1,1], [-1,2],[-2,1],[-2,2],[1,-1],[1,-2],[-2,-1],[-2,-2]]).reshape((16,2)) ym = np.array([1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4]).reshape(16,1) t1 = np.sin(np.array(range(1,24,2)).reshape(3,4).T) t2 = np.cos(np.array(range(1,40,2)).reshape(5,4).T) fname = srcs[part_id-1].rsplit('.',1)[0] mod = __import__(fname, fromlist=[fname], level=1) func = getattr(mod, fname) if part_id == 1: J = lrCostFunction(np.array([0.25, 0.5, -0.5]), X, y, 0.1) grad = gradientFunctionReg(np.array([0.25, 0.5, -0.5]), X, y, 0.1) return sprintf('%0.5f ', np.hstack((J, grad)).tolist()) elif part_id == 2: return sprintf('%0.5f ', oneVsAll(Xm, ym, 4, 0.1)) elif part_id == 3: return sprintf('%0.5f ', predictOneVsAll(t1, Xm)) elif part_id == 4: return sprintf('%0.5f ', predict(t1, t2, Xm))
def test(): print '... Testing' all_theta = matrix(loadtxt('theta.dat')) X_test = matrix(loadtxt('X_test.dat')) / 255.0 y_test = matrix(loadtxt('y_test.dat')).transpose() acc, pred = predictOneVsAll(all_theta, X_test) single_acc = sum(pred == y_test) / (len(y_test) * 1.0) max_acc = pow(single_acc, 4) min_acc = single_acc*4 - 3 print 'Theoretical accuracy:' print '\tSingle accuracy: %2.2f%%' % (single_acc*100) print '\tTotal accuracy: %2.2f%% ~ %2.2f%%' % (min_acc*100, max_acc*100)
def output(partId): # Random Test Cases X = column_stack((ones(20), exp(1) * sin(arange(1, 21, 1)), exp(0.5) * cos(arange(1, 21, 1)))) y = (sin(X[:,0] + X[:,1]) > 0).astype(int) Xm = array([[-1, -1], [-1, -2], [-2, -1], [-2, -2], [1, 1], [1, 2], [2, 1], [2, 2], [-1, 1], [-1, 2], [-2, 1], [-2, 2], [1, -1], [1, -2], [-2, -1], [-2, -2]]) ym = array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4]) t1 = sin(arange(1, 25, 2).reshape(4, 3, order='F')) t2 = cos(arange(1, 41, 2).reshape(4, 5, order='F')) if partId == '1': J, grad = lrCostFunction(array([0.25, 0.5, -0.5]), X, y, 0.1) out = sprintf('%0.5f ', J) return out + sprintf('%0.5f ', grad) elif partId == '2': return sprintf('%0.5f ', oneVsAll(Xm, ym, 4, 0.1)) elif partId == '3': return sprintf('%0.5f ', predictOneVsAll(t1, Xm)) elif partId == '4': return sprintf('%0.5f ', predict(t1, t2, Xm))
# Randomly select 100 data points to display rand_indices = np.random.permutation(range(m)) sel = X[rand_indices[0:100], :] displayData(sel) raw_input("Program paused. Press Enter to continue...") ## ============ Part 2: Vectorize Logistic Regression ============ # In this part of the exercise, you will reuse your logistic regression # code from the last exercise. You task here is to make sure that your # regularized logistic regression implementation is vectorized. After # that, you will implement one-vs-all classification for the handwritten # digit dataset. # print "Training One-vs-All Logistic Regression..." Lambda = 0.1 all_theta = oneVsAll(X, y, num_labels, Lambda) raw_input("Program paused. Press Enter to continue...") ## ================ Part 3: Predict for One-Vs-All ================ # After ... pred = predictOneVsAll(all_theta, X) accuracy = np.mean(np.double(pred == np.squeeze(y))) * 100 print "\nTraining Set Accuracy: %f\n" % accuracy
"""## Part 2b: One-vs-All Training ============""" print('\nTraining One-vs-All Logistic Regression...\n') reg_lambda = 0.1 all_theta = oneVsAll(X, y, num_labels, reg_lambda) print('Program paused. Press enter to continue.\n') pause() """## Part 3: Predict for One-Vs-All """ #Compute accuracy on our training set p = predictOneVsAll(all_theta, X) y = y.reshape((m)) print(y[rand_indices]) print("training Set Accuracy:: ", np.multiply(np.mean((p == y).astype(int)), 100), '%') # To give you an idea of the network's output, you can also run # through the examples one at the a time to see what it is predicting. # Randomly permute examples rand_indices = np.random.choice(m, 600) for i in range(600): # Display print('\nDisplaying Example Image\n')
(3, 5)).T), axis=1) y_t = np.array([1, 0, 1, 0, 1]) lambda_t = 3 J = lrCostFunction(theta_t, X_t, y_t, lambda_t) grad = gradientFunctionReg(theta_t, X_t, y_t, lambda_t) print('Cost: %f' % J) print('Expected cost: 2.534819\n') print('Gradients:') print(grad) print('Expected gradients:') print(' 0.146561\n -0.548558\n 0.724722\n 1.398003\n') print('Program paused. Press enter to continue.\n') # ============ Part 2b: One-vs-All Training ============ print('Training One-vs-All Logistic Regression...') Lambda = 0.1 all_theta = oneVsAll(X, y, num_labels, Lambda) input("Program paused. Press Enter to continue...") # ================ Part 3: Predict for One-Vs-All ================ # After ... pred = predictOneVsAll(all_theta, X) print(pred) accuracy = np.mean(np.double(pred == y.ravel())) * 100 print('\nTraining Set Accuracy: %f\n' % accuracy)
"""Handwriting recognition using one vs all logistic regression.""" __author__="Jesse Lord" __date__="January 9, 2015" from readData import readData from oneVsAll import oneVsAll from predictOneVsAll import predictOneVsAll import pickle firstrun = 0 if __name__=="__main__": (X,y) = readData() num_labels = 10 # number of labels for one vs all lam = 0.1 # regularization parameter if firstrun: all_thetas = oneVsAll(X,y,num_labels,lam) with open('thetas.pickle','w') as f: pickle.dump([all_thetas],f) else: with open('thetas.pickle') as f: [all_thetas] = pickle.load(f) prediction = predictOneVsAll(X,y,all_thetas,num_labels) print "One vs All determines the handwriting correctly on the training set "+str(100*prediction)+"% of the time."
# Нормализация свойств и добавление единичного признака m, n = X.shape X = np.concatenate((np.ones((m, 1)), X), axis = 1) # Задание общего числа классов (меток) num_labels = 10 # Задание начальных параметров модели initial_theta = np.zeros([n + 1, num_labels]) # Задание параметров градиентного спуска iterations = 1500 alpha = 1 # Визуализация процесса сходимости для i-го классифкатора flag = False # Выполнение процедуры обучения параметров модели all_theta = oneVsAll(X, y, num_labels, initial_theta, alpha, iterations, flag) input('Программа остановлена. Нажмите Enter для продолжения ... \n') # == Часть 3. Вычисление доли правильных ответов классификатора == print('Часть 3. Вычисление доли правильных ответов классификатора') # Вычисление доли правильных ответов классификатора p = predictOneVsAll(X, all_theta) acc = np.sum((p == y).astype('float64')) / len(y) * 100 print('Доля правильных ответов обученного классификатора = {:.4f}'.format(acc))
"""Handwriting recognition using one vs all logistic regression.""" __author__ = "Jesse Lord" __date__ = "January 9, 2015" from readData import readData from oneVsAll import oneVsAll from predictOneVsAll import predictOneVsAll import pickle firstrun = 0 if __name__ == "__main__": (X, y) = readData() num_labels = 10 # number of labels for one vs all lam = 0.1 # regularization parameter if firstrun: all_thetas = oneVsAll(X, y, num_labels, lam) with open('thetas.pickle', 'w') as f: pickle.dump([all_thetas], f) else: with open('thetas.pickle') as f: [all_thetas] = pickle.load(f) prediction = predictOneVsAll(X, y, all_thetas, num_labels) print "One vs All determines the handwriting correctly on the training set " + str( 100 * prediction) + "% of the time."
def predictOneVsAll(myTheta,myrow): """ Function that computes a hypothesis for an individual image (row in X) and returns the predicted integer corresponding to the handwritten image """ classes = [10] + [x for x in range(1,10)] hypots = [0] * len(classes) #Compute a hypothesis for each possible outcome #Choose the maximum hypothesis to find result for i in range(len(classes)): hypots[i] = h(myTheta[i],myrow) return classes[np.argmax(np.array(hypots))] import sys sys.path.append("..") import predictOneVsAll as qqin pred = qqin.predictOneVsAll(Theta.T, X[:,1:]) print(pred.shape, '\nTraining Set Accuracy: ', np.mean(y.ravel() == pred.ravel())) input() # "You should see that the training set accuracy is about 94.9%" n_correct, n_total = 0., 0. incorrect_indices = [] for irow in range(X.shape[0]): n_total += 1 if predictOneVsAll(Theta,X[irow]) == y[irow]: n_correct += 1 else: incorrect_indices.append(irow) print( "Training set accuracy: %0.1f%%" %(100*(n_correct/n_total)))
import scipy.io as sio import numpy as np ## read data from ex3data1.mat a=sio.loadmat('ex3data1.mat') data=a['X'] data=np.array(data)# 5000*400 labels=a['y'] labels=np.array(labels)# 5000*1 #from sklearn.datasets.samples_generator import make_blobs # display is not realted of the methods, realize it latter #data, labels = make_blobs(n_samples=5000, centers=10, random_state=0, cluster_std=0.5) m,n=data.shape #from featureNormalize import featureNormalize #data,data_mu,data_sigma=featureNormalize(data) # test cost_function #from cost_function import cost_function #theta=np.zeros([n,1]) #J,cost=cost_function(theta,data,np.array(labels==4,dtype=int)) from oneVsall import oneVsall all_theta=oneVsall(data,labels,10,0.1) np.save("theta_50.npy",all_theta) #all_theta=np.load('theta_50.npy') from predictOneVsAll import predictOneVsAll p=np.reshape(predictOneVsAll(all_theta,data),[m,1]) pred=np.array(p==labels,dtype=int) print float(np.sum(pred))/5000.0
print(' 0.146561\n -0.548558\n 0.724722\n 1.398003\n') print('Program paused. Press enter to continue.\n') pause() """## Part 2b: One-vs-All Training ============""" print('\nTraining One-vs-All Logistic Regression...\n') reg_lambda = 0.1 all_theta = oneVsAll(X, y, num_labels, reg_lambda) print('Program paused. Press enter to continue.\n') pause() """## Part 3: Predict for One-Vs-All """ #Compute accuracy on our training set p = predictOneVsAll(all_theta, X) y = y.reshape((m)) print(y[rand_indices]) print("training Set Accuracy:: ", np.multiply(np.mean((p == y).astype(int)), 100), '%') # To give you an idea of the network's output, you can also run # through the examples one at the a time to see what it is predicting. # Randomly permute examples rand_indices = np.random.choice(m, 600) for i in range(600): # Display
def ex3(): ## Machine Learning Online Class - Exercise 3 | Part 1: One-vs-all # Instructions # ------------ # # This file contains code that helps you get started on the # linear exercise. You will need to complete the following functions # in this exericse: # # lrCostFunction.m (logistic regression cost function) # oneVsAll.m # predictOneVsAll.m # predict.m # # For this exercise, you will not need to change any code in this file, # or any other files other than those mentioned above. # ## Initialization #clear ; close all; clc ## Setup the parameters you will use for this part of the exercise input_layer_size = 400 # 20x20 Input Images of Digits num_labels = 10 # 10 labels, from 1 to 10 # (note that we have mapped "0" to label 10) ## =========== Part 1: Loading and Visualizing Data ============= # We start the exercise by first loading and visualizing the dataset. # You will be working with a dataset that contains handwritten digits. # # Load Training Data print('Loading and Visualizing Data ...') mat = scipy.io.loadmat( 'ex3data1.mat') # training data stored in arrays X, y X = mat['X'] y = mat['y'].ravel() % 10 m = y.size # Randomly select 100 data points to display rand_indices = np.random.choice(m, 100, replace=False) sel = X[rand_indices, :] displayData(sel) plt.savefig('figure1.png') print('Program paused. Press enter to continue.') #pause; ## ============ Part 2: Vectorize Logistic Regression ============ # In this part of the exercise, you will reuse your logistic regression # code from the last exercise. You task here is to make sure that your # regularized logistic regression implementation is vectorized. After # that, you will implement one-vs-all classification for the handwritten # digit dataset. # print('\nTraining One-vs-All Logistic Regression...') lambda_value = 0.1 all_theta = oneVsAll(X, y, num_labels, lambda_value) print('Program paused. Press enter to continue.') #pause; ## ================ Part 3: Predict for One-Vs-All ================ # After ... pred = predictOneVsAll(all_theta, X) print('\nTraining Set Accuracy: %f' % (np.mean( (pred - 1 == y).astype(int)) * 100))
import readData from oneVsAll import oneVsAll import pickle from predictOneVsAll import predictOneVsAll firstRun = 0 if __name__ == "__main__": (X, y) = readData.readData() numOfLabels = 10 lam = 0.1 if firstRun: all_theta = oneVsAll(X, y, numOfLabels, lam) with open('thetas.pickle', 'w') as f: pickle.dump([all_theta], f) else: with open('thetas.pickle') as f: [all_theta] = pickle.load(f) prediction = predictOneVsAll(X, y, all_theta, numOfLabels) print "One vs All determines the handwriting correctly on the training set " + str( 100 * prediction) + "% of the time."
x = data['X'] y = data['y'] x.shape y.shape # 直接用.shape可以快速读取矩阵的形状,使用shape[0]读取矩阵第一维度的长度(即行数) m = x.shape[0] # m为样本数,此训练集的样本数为500 ##=========================Part2:Visualizing the data========================= import displayData as Data #绘制样本训练集 Data.displayData(x) ##========================Part3: compute cost function======================== ##===========================Part4:gradient descent============================ ##==============================Part5:训练分类器============================= import onevsall as OVA K = 10 lam = 1 X = np.c_[np.ones(m), x] theta_ALL = OVA.onevsall(X, y, lam, K) ##===============================Part6:predict================================ import predictOneVsAll as PRE y_pre = PRE.predictOneVsAll(theta_ALL, x) accuracy = np.mean(y_pre == y) print('accuracy = {0}%'.format(accuracy * 100))