def dataset3_params(X, y, Xval, yval): all_C = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30] all_sigma = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30] best_C = all_C[0] best_sigma = all_sigma[0] previous_err = 1000.0 for C in all_C: for sigma in all_sigma: gamma = 1.0 / (2.0 * sigma ** 2) model = libsvm.fit(X, y, kernel='rbf', C=C, gamma=gamma) predictions = libsvm.predict( Xval, support=model[0], SV=model[1], nSV=model[2], sv_coef=model[3], intercept=model[4], label=model[5], probA=model[6], probB=model[7], kernel='rbf', gamma=gamma ) err = np.mean(predictions != yval) if err < previous_err: best_C = C best_sigma = sigma previous_err = err return (best_C, best_sigma)
def train_data(featureMatrix, labels): #Need to transform feature vector into something recognized by scikit learn #try to keep it a sparse vector so that memory utilization is low. Can we #do feature hashing? (what is it and how will it help?) model = libsvm.fit(featureMatrix.toarray(), array(labels, dtype='float64')) return model
return (arr-mean)/math.sqrt(variance) # Training set and labels are loaded here X,Y = load_svmlight_file(f='train_raw', n_features=16, multilabel=False, zero_based='auto', query_id=False, dtype=np.float64) X = X.toarray() # The original training and test labels are normalised here for j in range(0,len(X)): X[j] = normaliseData(X[j]) for j in range(0,len(data)): data[j] = normaliseData(data[j]) # SVM model is trained here using libsvm in-built library functions. Kernel typr is polynomial here and its degree is kept 4. # All parameters are kept for making the most optimal fit for the data [support, sv, nsv, coeff, intercept, proba, probb, fit_status] = libsvm.fit(X, Y, svm_type=0, kernel='poly', degree=4, gamma=0.093, coef0=0, tol=0.001, C=1, nu=0.5, max_iter=-1, random_seed=0) m = [support, sv, nsv, coeff, intercept, proba, probb] save_model('model.pkl',m) [support_, sv_, nsv_, coeff_, intercept_, proba_, probb_] = load_model('model.pkl') # Predictions are made on the test dataset using the hyper parameters trained on training dataset dec_values = libsvm.predict(data, support_, sv_, nsv_, coeff_, intercept_, proba_, probb_, svm_type=0,kernel='poly', degree =4, gamma=0.093, coef0=0) dec_values.astype(int) # Predictions are written to a csv file named result.csv j=0 with open('result.csv', 'w') as csvfile: spamwriter = csv.writer(csvfile) for j in range(0,len(dec_values)):
lsRBF = LabelSpreading(kernel='rbf') y_pred_lsRBF = lsRBF.fit(mediumTrainingLst_input, mediumTrainingLst_target) #%% lsKNNC = LabelSpreading(kernel='knn') y_pred_lsKNNC = lsKNNC.fit(mediumClusterTrainingLst_input, mediumClusterTrainingLst_target) lsRBFC = LabelSpreading(kernel='rbf') y_pred_lsRBFC = lsRBFC.fit(mediumClusterTrainingLst_input, mediumClusterTrainingLst_target) #%% ''' Libsvm ''' #lsvm = libsvm() y_pred_lsvm = libsvm.fit(np.array(smallTrainingLst_input), np.array(smallTrainingLst_target)) #%% def svmScore(predictLst, targetLst): correctCount = 0 for i in range(len(predictLst)): if predictLst[i] == targetLst[i]: correctCount += 1 return correctCount / len(predictLst) print('Testing with training data:') #print('Gaussian Naive Bayes, 100% Data: ', end = '') #print(gnb.score(trainingLst_input[:100], trainingLst_target[:100])) #print('Multinomial Naive Bayes, 100% Data: ', end = '')
def build(self, x_train, y_train, path, **parameter): x = x_train.as_matrix() x = x.copy(order='C').astype(np.float64) y = y_train.as_matrix().astype(np.float64) self.model = libsvm.fit(x, y, **parameter)
if __name__ == '__main__': file_contents = open('../../octave/mlclass-ex6/emailSample1.txt', 'r').read() vocabulary = get_vocabulary() word_indices = process_email(file_contents, vocabulary) print('Word indices:\n%s' % word_indices) features = email_features(word_indices, vocabulary) print('Length of feature vector: %d' % len(features)) print('Number of non-zero entries: %d' % sum(features > 0)) # train SVM for spam classification data = loadmat('../../octave/mlclass-ex6/spamTrain.mat') X = np.require(data['X'], dtype=np.float64, requirements='C_CONTIGUOUS') y = np.require(data['y'].flatten(), dtype=np.float64) C = 0.1 model = libsvm.fit(X, y, kernel='linear', C=C) predictions = libsvm.predict( X, support=model[0], SV=model[1], nSV=model[2], sv_coef=model[3], intercept=model[4], label=model[5], probA=model[6], probB=model[7], kernel='linear', ) accuracy = 100 * np.mean(predictions == y) print('Training set accuracy: %0.2f %%' % accuracy) # load test set
err = np.mean(predictions != yval) if err < previous_err: best_C = C best_sigma = sigma previous_err = err return (best_C, best_sigma) if __name__ == '__main__': data = loadmat('../../octave/mlclass-ex6/ex6data1.mat') X = np.require(data['X'], dtype=np.float64, requirements='C_CONTIGUOUS') y = np.require(data['y'].flatten(), dtype=np.float64) plot_data(X, y) print('Training Linear SVM ...') C = 1.0 model = libsvm.fit(X, y, kernel='linear', tol=0.001, C=C, max_iter=20) visualize_boundary_linear(X, y, model) # evaluate gaussian kernel x1 = np.array([1.0, 2.0, 1.0]) x2 = np.array([0.0, 4.0, -1.0]) sigma = 2.0 value = gaussian_kernel(x1, x2, sigma) print( 'Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = 0.5: %f' % value) print('(this value should be about 0.324652)') # dataset 2 data = loadmat('../../octave/mlclass-ex6/ex6data2.mat') X = np.require(data['X'], dtype=np.float64, requirements='C_CONTIGUOUS') y = np.require(data['y'].flatten(), dtype=np.float64) plot_data(X, y)
# original email was processed using the processEmail and emailFeatures # functions and converted into a vector x(i) wit a size of 1899.(4000X1899 for svmtrain.mat, # 1000X1899 for svmtest.mat ) svmtrain = loadmat( 'D:\ML\ML\CSR ML\WEEK#7\Machine Learning Assignment#6\Python\spamTrain.mat' ) X = np.require(svmtrain['X'], dtype=np.float64, requirements='C') # 51X2 #print(X.flags) y = np.require(svmtrain['y'].flatten(), dtype=np.float64) # 51X1 print('Training Linear SVM (Spam Classification)') print('(this may take 1 to 2 minutes) ...') C = 0.1 model = libsvm.fit(X, y, kernel='linear', C=C) p = libsvm.predict(X, support=model[0], SV=model[1], nSV=model[2], sv_coef=model[3], intercept=model[4], probA=model[5], probB=model[6], kernel='linear') accuracyTrain = np.mean(p == y) * 100 print('Training accuracy', accuracyTrain) # Test Spam Classification svmtest = loadmat(
err = np.mean(predictions != yval) if err < previous_err: best_C = C best_sigma = sigma previous_err = err return (best_C, best_sigma) if __name__ == '__main__': data = loadmat('../../octave/mlclass-ex6/ex6data1.mat') X = np.require(data['X'], dtype=np.float64, requirements='C_CONTIGUOUS') y = np.require(data['y'].flatten(), dtype=np.float64) plot_data(X, y) print('Training Linear SVM ...') C = 1.0 model = libsvm.fit(X, y, kernel='linear', tol=0.001, C=C, max_iter=20) visualize_boundary_linear(X, y, model) # evaluate gaussian kernel x1 = np.array([1.0, 2.0, 1.0]) x2 = np.array([0.0, 4.0, -1.0]) sigma = 2.0 value = gaussian_kernel(x1, x2, sigma) print('Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = 0.5: %f' % value) print('(this value should be about 0.324652)') # dataset 2 data = loadmat('../../octave/mlclass-ex6/ex6data2.mat') X = np.require(data['X'], dtype=np.float64, requirements='C_CONTIGUOUS') y = np.require(data['y'].flatten(), dtype=np.float64) plot_data(X, y) print('Training SVM with RBF Kernel ...') C = 1.0