def dataset3Params(X, y, Xval, yval): # You need to return the following variables correctly. c_final = 1 sigma_final = 0.3 test_vals = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30] lowest_error = np.inf for c in test_vals: for s in test_vals: model = utils.svmTrain(X, y, c, gaussianKernel, (s)) predictions = utils.svmPredict(model, Xval) error = np.mean(predictions != yval) if (error < lowest_error): lowest_error = error c_final = c sigma_final = s return c_final, sigma_final
def dataset3Params(X, y, Xval, yval): """ Instructions ------------ Fill in this function to return the optimal C and sigma learning parameters found using the cross validation set. You can use `svmPredict` to predict the labels on the cross validation set. For example, predictions = svmPredict(model, Xval) will return the predictions on the cross validation set. """ # You need to return the following variables correctly. C = 1 sigma = 0.3 # ====================== YOUR CODE HERE ====================== testvalues = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30] best_error = 10000 for C_i in testvalues: for sigma_j in testvalues: predictions = utils.svmPredict( utils.svmTrain(X, y, C_i, gaussianKernel, args=(sigma_j, )), Xval) error = np.mean(predictions != yval) if error < best_error: best_error = error C = C_i sigma = sigma_j # ============================================================ return C, sigma
def dataset3Params(X, y, Xval, yval): C = 0 sigma = 0 correctness = 0 grid_list = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30] for C_i in grid_list: for sigma_i in grid_list: model_i = utils.svmTrain(X, y, C_i, gaussianKernel, args=(sigma_i, )) predictions_i = utils.svmPredict(model_i, Xval) correctness_i = np.mean(predictions_i == yval) if correctness_i > correctness: C = C_i sigma = sigma_i correctness = correctness_i print(C, sigma, correctness_i) return C, sigma
def dataset3Params(X, y, Xval, yval): # You need to return the following variables correctly. C = 1 sigma = 0.3 # ====================== YOUR CODE HERE ====================== c = {0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30} s = {0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30} best_prediction = 100000000.0 for C in c: for sigma in s: model = utils.svmTrain(X, y, C, gaussianKernel, args=(sigma, )) predictions = utils.svmPredict(model, Xval) current_prediction = np.mean(predictions != yval) if current_prediction < best_prediction: best_prediction = current_prediction final_C = C final_sigma = sigma # ============================================================ #return C, sigma return final_C, final_sigma
def dataset3Params(X, y, Xval, yval): # You need to return the following variables correctly. # C = 1 # sigma = 0.3 number_vec = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30] predicts = [] for i in range(len(number_vec)): for j in range(len(number_vec)): C = number_vec[i] sigma = number_vec[j] model = utils.svmTrain(X, y, C, gaussianKernel, args=(sigma, )) predictions = utils.svmPredict(model, Xval) err_cv = np.mean(predictions != yval) temp_tuple = (C, sigma, err_cv) predicts.append(temp_tuple) # print(len(predicts)) tam = sorted(predicts, key=lambda tup: tup[2]) # print(tam[0]) # return C, sigma return tam[0][0], tam[0][1]
def dataset3Params(X, y, Xval, yval): """ Returns your choice of C and sigma for Part 3 of the exercise where you select the optimal (C, sigma) learning parameters to use for SVM with RBF kernel. Parameters ---------- X : array_like (m x n) matrix of training data where m is number of training examples, and n is the number of features. y : array_like (m, ) vector of labels for ther training data. Xval : array_like (mv x n) matrix of validation data where mv is the number of validation examples and n is the number of features yval : array_like (mv, ) vector of labels for the validation data. Returns ------- C, sigma : float, float The best performing values for the regularization parameter C and RBF parameter sigma. Instructions ------------ Fill in this function to return the optimal C and sigma learning parameters found using the cross validation set. You can use `svmPredict` to predict the labels on the cross validation set. For example, predictions = svmPredict(model, Xval) will return the predictions on the cross validation set. Note ---- You can compute the prediction error using np.mean(predictions != yval) """ # You need to return the following variables correctly. C = 1 sigma = 0.3 # ====================== YOUR CODE HERE ====================== # Range of C and sigma values to be tested. C_list = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30] sigma_list = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30] best_C = 0.01 best_sigma = 0.01 # First iteration to simply set best_error. model = utils.svmTrain(X, y, best_C, gaussianKernel, args=(best_sigma,)) predictions = utils.svmPredict(model, Xval) best_error = np.mean(predictions != yval) # Iterate through all possible training scenarios using each # C and sigma value. Save the optimal values based on lowest # error value and return them. for C in C_list: for sigma in sigma_list: model = utils.svmTrain(X, y, C, gaussianKernel, args=(sigma,)) predictions = utils.svmPredict(model, Xval) error = np.mean(predictions != yval) if error < best_error: best_error = error best_C = C best_sigma = sigma # ============================================================ return best_C, best_sigma
# -------------------------- Testing Gaussian Kernel -------------------------------------- # Load from ex6data1 # You will have X, y as keys in the dict data data = loadmat(os.path.join('Data', 'ex6data1.mat')) X, y = data['X'], data['y'][:, 0] # Plot training data utils.plotData(X, y) #pyplot.show() # You should try to change the C value below and see how the decision # boundary varies (e.g., try C = 1000) C = 1 model = utils.svmTrain(X, y, C, utils.linearKernel, 1e-3, 20) utils.visualizeBoundaryLinear(X, y, model) #pyplot.show() x1 = np.array([1, 2, 1]) x2 = np.array([0, 4, -1]) sigma = 2 sim = gaussianKernel(x1, x2, sigma) print('Gaussian Kernel between x1 = [1, 2, 1], x2 = [0, 4, -1], sigma = %0.2f:' '\n\t%f\n(for sigma = 2, this value should be about 0.324652)\n' % (sigma, sim)) grader[1] = gaussianKernel grader.grade()
def dataset3Params(X, y, Xval, yval): """ Returns your choice of C and sigma for Part 3 of the exercise where you select the optimal (C, sigma) learning parameters to use for SVM with RBF kernel. Parameters ---------- X : array_like (m x n) matrix of training data where m is number of training examples, and n is the number of features. y : array_like (m, ) vector of labels for ther training data. Xval : array_like (mv x n) matrix of validation data where mv is the number of validation examples and n is the number of features yval : array_like (mv, ) vector of labels for the validation data. Returns ------- C, sigma : float, float The best performing values for the regularization parameter C and RBF parameter sigma. Instructions ------------ Fill in this function to return the optimal C and sigma learning parameters found using the cross validation set. You can use `svmPredict` to predict the labels on the cross validation set. For example, predictions = svmPredict(model, Xval) will return the predictions on the cross validation set. Note ---- You can compute the prediction error using np.mean(predictions != yval) """ # You need to return the following variables correctly. C = 1 sigma = 0.3 # ====================== YOUR CODE HERE ====================== C_array = np.array([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]) sigma_array = np.array([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]) err_array = np.zeros([C_array.size, sigma_array.size]) for i in np.arange(C_array.size): for j in np.arange(sigma_array.size): model = utils.svmTrain(X, y, C_array[i], gaussianKernel, args=(sigma_array[j], )) predictions = utils.svmPredict(model, Xval) pred_error = np.mean(predictions != yval) err_array[i, j] = pred_error ind = np.unravel_index(np.argmin(err_array, axis=None), err_array.shape) C = C_array[ind[0]] sigma = sigma_array[ind[1]] # ============================================================ return C, sigma
features = emailFeatures(word_indices) # Print Stats print('\nLength of feature vector: %d' % len(features)) print('Number of non-zero entries: %d' % sum(features > 0)) # Load the Spam Email dataset # You will have X, y in your environment data = loadmat(os.path.join('Data', 'spamTrain.mat')) X, y = data['X'].astype(float), data['y'][:, 0] print('Training Linear SVM (Spam Classification)') print('This may take 1 to 2 minutes ...\n') C = 0.1 model = utils.svmTrain(X, y, C, utils.linearKernel) # Compute the training accuracy p = utils.svmPredict(model, X) print('Training Accuracy: %.2f' % (np.mean(p == y) * 100)) # Load the test dataset # You will have Xtest, ytest in your environment data = loadmat(os.path.join('Data', 'spamTest.mat')) Xtest, ytest = data['Xtest'].astype(float), data['ytest'][:, 0] print('Evaluating the trained Linear SVM on a test set ...') p = utils.svmPredict(model, Xtest) print('Test Accuracy: %.2f' % (np.mean(p == ytest) * 100))
def dataset3Params(X, y, Xval, yval): """ Returns your choice of C and sigma for Part 3 of the exercise where you select the optimal (C, sigma) learning parameters to use for SVM with RBF kernel. Parameters ---------- X : array_like (m x n) matrix of training data where m is number of training examples, and n is the number of features. y : array_like (m, ) vector of labels for ther training data. Xval : array_like (mv x n) matrix of validation data where mv is the number of validation examples and n is the number of features yval : array_like (mv, ) vector of labels for the validation data. Returns ------- C, sigma : float, float The best performing values for the regularization parameter C and RBF parameter sigma. Instructions ------------ Fill in this function to return the optimal C and sigma learning parameters found using the cross validation set. You can use `svmPredict` to predict the labels on the cross validation set. For example, predictions = svmPredict(model, Xval) will return the predictions on the cross validation set. Note ---- You can compute the prediction error using np.mean(predictions != yval) """ # You need to return the following variables correctly. C = 1 sigma = 0.3 error = 10**6 # ====================== YOUR CODE HERE ====================== for i in [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]: for j in [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]: model = utils.svmTrain(X, y, i, gaussianKernel, args=(j, )) predictions = utils.svmPredict(model, Xval) temp = np.mean(predictions != yval) if temp < error: error = temp C = i sigma = j # ============================================================ return C, sigma
from scipy.io import loadmat import utils from scipy import optimize import re ###===================Part 1.1: Load and Visualizing Data ============================ data = loadmat( "D:/TJH/ML03/machine-learning-ex6/machine-learning-ex6/ex6/ex6data1.mat") X, y = data["X"], data["y"].ravel() ### X: (12,1) # utils.plotData(X,y) # plt.show() C = 1 # C = 100 model = utils.svmTrain(X, y, C, utils.linearKernel, 1e-3, 20) # utils.visualizeBoundaryLinear(X,y,model) # plt.show() ###===================Part 1.2.1: SVM with Gaussian Kernels ============================ def gaussianKernel(x1, x2, sigma): sim = 0 # ====================== YOUR CODE HERE ====================== norm = np.linalg.norm(x1 - x2) sim = np.exp(-norm**2 / 2 / (sigma**2)) # ============================================================= return sim
return features_vector data = loadmat(os.path.join('Data', 'spamTrain.mat')) X = data["X"] y = data["y"] y = y.flatten() print(y[0]) tam = X[0] print(tam) # sss = X[:5] sss_y = y[:5] print(sss.shape) # Train the SVM model = utils.svmTrain(X, y, 3, gaussianKernel, args=(0.1, )) # model = utils.svmTrain(X, y, 3, utils.linearKernel, args=(0.01,)) # predictions = utils.svmPredict(model, X) # err_train = np.mean(predictions == y) * 100 # print('Train Accuracy: {} % '.format(err_train)) # data = loadmat(os.path.join('Data', 'spamTest.mat')) # Xtest = data["Xtest"] # ytest = data["ytest"] # ytest = ytest.flatten() # predictions_test = utils.svmPredict(model, Xtest) # err_test = np.mean(predictions_test == ytest) * 100 # print('Test Accuracy: {} % '.format(err_test))