def main(filename='data\iris-virginica.txt', C=1.0, kernel_type='linear', epsilon=0.001): # Load data (data, _) = readData('%s\%s' % (filepath, filename), header=False) data = data.astype(float) # Split data X, y = data[:, 0:-1], data[:, -1].astype(int) # Initialize model model = SVM() # Fit model support_vectors, iterations = model.fit(X, y) # Support vector count sv_count = support_vectors.shape[0] # Make prediction y_hat = model.predict(X) # Calculate accuracy acc = calc_acc(y, y_hat) print("Support vector count: %d" % (sv_count)) print("bias:\t\t%.3f" % (model.b)) print("w:\t\t" + str(model.w)) print("accuracy:\t%.3f" % (acc)) print("Converged after %d iterations" % (iterations))
def main(C=1.0, epsilon=0.001): # Split data iris = datasets.load_iris() X = iris.data y = iris.target class_chosen = 1 # only this class is chosen y = np.asarray([-1 if y[i]!=class_chosen else 1 for i in range(y.shape[0])]) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42) # Initialize model model = SVM(X_train, y_train, C=C, tolerance=epsilon) # Fit model support_vectors, iterations = model.fit() # Support vector count sv_count = support_vectors.shape[0] # Make prediction y_hat = model.predict(X_test) # print(y_hat.shape, y_test.shape) # Calculate accuracy acc = calc_acc(y_test, y_hat) print("Support vector count: %d" % (sv_count)) # print("bias:\t\t%.3f" % (model.b)) # print("w:\t\t" + str(model.w)) print("accuracy:\t%.3f" % (acc)) print("Converged after %d iterations" % (iterations))
def svmTest(feature_len, all_lines, all_features, all_labels): counts = {} for i in range(10): rate = 0 print("Test %d:" % (i + 1)) train_features = all_features[0:int(0.8 * len(all_features))] train_labels = all_labels[0:int(0.8 * len(all_features))] test_features = all_features[int(0.8 * len(all_features)):] test_labels = all_labels[int(0.8 * len(all_features)):] length = len(test_labels) for C in range(50, 61, 1): rate = 0 new_svm = SVM(train_features, train_labels, C=C, function='RBF', d=0.53) # print("Train:") new_svm.train() # print("\nPredict:", end = "\n") for j in range(0, length): res = new_svm.predict(test_features[j]) if res == test_labels[j]: rate += 1 print("C = %f: " % C, end=" ") print(rate / length) if C not in counts: counts[C] = rate / length else: counts[C] += rate / length all_features, all_labels = now_provider.getFeatureAndLabel( all_lines, feature_len) for x, y in counts: print(x, y)
def cross_validation(x_train, y_train, C, gamma): model = SVM(C=C, kernel='rbf', gamma=gamma, tol=1e-2) cross = lambda arr, sz: [arr[i:i + sz] for i in range(0, len(arr), sz)] x_cross_val = np.array(cross(x_train, 160)) y_cross_val = np.array(cross(y_train, 160)) indices = np.array(range(5)) score = 0 for i in range(5): curr_indices = np.delete(indices, i) x_curr_valid = x_cross_val[i] y_curr_valid = y_cross_val[i] x_curr_train = np.vstack(x_cross_val[curr_indices]) y_curr_train = y_cross_val[curr_indices].ravel() model.fit(x_curr_train, y_curr_train) model.number_support_vectors() y_curr_valid_predict = model.predict(x_curr_valid, x_curr_train, y_curr_train) curr_score = model.score_error(y_curr_valid_predict, y_curr_valid) print( "i = ", i, ". Score error = ", curr_score, ", i = ", i, ) score += curr_score print("Average score: ", score / 5) return score / 5
def real_data_train(): x, y = create_array_real_data() shuffle_index = np.random.permutation(len(y)) x = x[shuffle_index] y = y[shuffle_index] # 1000 elements: 800 for training, 200 for testing x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42) # C = [1, 10] # gamma = [0.01, 0.1, 0.5, 1.0] # average_error = np.zeros((len(C), len(gamma))) # for i in range(len(C)): # for j in range(len(gamma)): # print("Cross-validation for parameters C = ", C[i], ", gamma = ", gamma[j]) # average_error[i][j] = cross_validation(x_train, y_train, C=C[i], gamma=gamma[j]) # find C = 1, gamma = 0.01 print("Create model C = ", 1000, ", gamma = ", 1) model = SVM(C=1, kernel='rbf', gamma=0.01, tol=1e-2) print("Fit model with train sequence") model.fit(x_train, y_train) model.number_support_vectors() print("Predict model on test sequence") y_test_predict = model.predict(x_test, x_train, y_train) score = model.score_error(y_test_predict, y_test) print("Score error = ", score)
def build(kernel, metric, keys_limit, svm_C, logs): trainX = genfromtxt('input/arcene_train.data', delimiter=' ') trainY = genfromtxt('input/arcene_train.labels', delimiter=' ') validX = genfromtxt('input/arcene_valid.data', delimiter=' ') validY = genfromtxt('input/arcene_valid.labels', delimiter=' ') keys = metric.build(trainX.transpose(), trainY, logs=logs, limit=keys_limit) tX = [] for x in trainX: tX.append(np.take(x, keys)) tX = np.array(tX) clf = SVM(kernel=kernel.kernel, C=svm_C) clf.fit(tX, trainY) vX = [] for x in validX: vX.append(np.take(x, keys)) vX = np.array(vX) predict_arr = [clf.predict(x) for x in vX] confusion_matrix = Statistic.get_metrics(predict_arr, validY) f_measure = Statistic.get_f_measure(confusion_matrix) return keys, confusion_matrix, f_measure
def test_image(): X, y = get_image_data() X = np.column_stack([[1] * X.shape[0], X]) X_train,X_test,y_train,y_test = \ train_test_split(X,y,test_size=0.2,random_state = np.random.RandomState(42)) clf = SVM() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) correct_rate = 1 - np.mean(y_test != y_pred) print 'correct_rate:', correct_rate
def test_simple(self): X = np.array([[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [1.0, 1.0], [2.4, 2.4], [2.6, 2.6], [4.0, 4.0]]) y = np.array([0.0, 0.0, 1.0, 1.0]) K = self.gram_matrix(X, kernels.linear) svm = SVM(kernels.linear, 1.0, K) svm.fit(np.arange(4), y) result = svm.predict(np.arange(4, 8)) np.testing.assert_allclose(result, [0, 0, 1, 1])
def test_simple(self): X = np.array([[1.0, 1.0],[2.0, 2.0],[3.0, 3.0],[4.0, 4.0], [1.0, 1.0],[2.4, 2.4],[2.6, 2.6],[4.0, 4.0]]) y = np.array([0.0, 0.0, 1.0, 1.0]) K = self.gram_matrix(X, kernels.linear) svm = SVM(kernels.linear, 1.0, K) svm.fit(np.arange(4), y) result = svm.predict(np.arange(4,8)) np.testing.assert_allclose(result, [0, 0, 1, 1])
def main(): dataset_dir = '../data/student-mat.csv' select_col = ['school', 'sex', 'age', 'address', 'famsize', 'Pstatus', 'Medu', 'Fedu', 'Mjob', 'Fjob', 'reason', 'guardian', 'traveltime', 'studytime', 'failures', 'schoolsup', 'famsup', 'paid', 'activities', 'nursery', 'higher', 'internet', 'romantic', 'famrel', 'freetime', 'goout', 'Dalc', 'Walc', 'health', 'absences',] # 'G1', 'G2'] select_col = ['G1', 'G2'] train_x, train_y, test_x, test_y = data_loader(dataset_dir, select_col=select_col) knn = SVM() knn.fit(train_x, train_y) predict_y = knn.predict(test_x) result = evaluate(test_y, predict_y) print(result)
def test_multi(): X, y = get_multi_data() X = np.column_stack([[1] * X.shape[0], X]) X_train,X_test,y_train,y_test = \ train_test_split(X,y,test_size=0.2,random_state = np.random.RandomState(42)) clf = SVM() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) correct_rate = 1 - np.mean(y_test != y_pred) print 'correct_rate:', correct_rate plot_samples(X, y) print clf.w for w in clf.w: plot_line(X[:, 1:], w)
def auto_get_parameter(X_train,y_train,X_val,y_val): learning_rates=[1e-7,5e-5] regularization_strength=[5e4,1e5] best_parameter=None best_val=-1 for i in learning_rates: for i in regularization_strength: svm=SVM() y_pred=svm.predict(X_train,y_train,j,1,200,1500,True) acc_val=np.mean(y_val==y_pred) if best_val<acc_val: best_val=acc_val best_parameter=(i,j) print('have been identified parameter Best validation accuracy achieved during cross-validation: %f' % best_val ) return best_parameter
def main(filename='iris-virginica.txt', C=1.0, kernel_type='linear', epsilon=0.001): # Load data # (data, _) = readData('%s/%s' % (filepath, filename), header=False) # data = data.astype(float) data = pd.read_excel("C:/Users/Niku/Documents/dataset/arrays.xlsx") print(data.shape) X = data[0:1500] X = np.array(X) y = X[:, 35] X = X[:, 0:35] print(X.shape) #y = np.matrix(y) y = np.array(y) y[y == 0] = -1 y1 = np.matrix(y) print(y.shape, X.shape, y1.shape) # Split data # X, y = data[:,0:-1], data[:,-1].astype(int) # print (X.shape) # print (y.shape) # X1 = np.matrix(X) # y1 = np.matrix(y) # print (X1.shape) # print (y1.shape) # print(type(X)) # print(type(y)) # Initialize model model = SVM() # Fit model support_vectors, iterations = model.fit(X, y) # Support vector count sv_count = support_vectors.shape[0] # Make prediction y_hat = model.predict(X) # Calculate accuracy acc = calc_acc(y, y_hat) print("Support vector count: %d" % (sv_count)) print("bias:\t\t%.3f" % (model.b)) print("w:\t\t" + str(model.w)) print("accuracy:\t%.3f" % (acc)) print("Converged after %d iterations" % (iterations))
def best_params(): lr_list = [0.1, 0.01, 0.05, 0.001, 0.005, 0.0001, 0.0005] acc_max = 0 lr_max = 0 lamda_max = 0 lambda_list = [0.1, 0.01, 0.05, 0.001, 0.005, 0.0001, 0.0005] for lr_val in lr_list: for lmda in lambda_list: clf = SVM(lr=lr_val, lamda=lmda) clf.fit(X_train, Y_train) predictions = clf.predict(X_test) acc = accuracy(Y_test, predictions) if acc > acc_max: acc_max = acc lr_max = lr_val lamda_max = lmda return (lr_max, lamda_max, acc_max)
def main(filename='data/data_banknote_authentication.txt', C=1.0, kernel_type='linear', epsilon=0.001): # Load data (data, _) = readData('%s/%s' % (filepath, filename), header=False) data = data.astype(float) random.shuffle(data) #data karıştırılıyor rastgele train_data = data[:int((len(data) + 1) * .80)] #Remaining 80% to training set test_data = data[int(len(data) * .80 + 1):] #Splits 20% data to test set #Data parçalama X, y = train_data[:, 0:-1], train_data[:, -1].astype(int) #Eğitim için X1, y1 = test_data[:, 0:-1], test_data[:, -1].astype(int) #Test için # Split data #X, y = data[:,0:-1], data[:,-1].astype(int) # Initialize model model = SVM() # Fit model support_vectors, iterations = model.fit( X, y) #eğitim için olan datalar burada gidiyor # Support vector count sv_count = support_vectors.shape[0] # Make prediction y_hat = model.predict(X1) #test için datalar burada kullanılıyor # Calculate accuracy acc = calc_acc(y1, y_hat) print("Support vector count: %d" % (sv_count)) print("bias:\t\t%.3f" % (model.b)) print("w:\t\t" + str(model.w)) print("accuracy:\t%.3f" % (acc)) print("Converged after %d iterations" % (iterations))
def test_binary(): X, y = get_binary_data() y[y == 0] = -1 # X = np.column_stack([[1]*X.shape[0],X]) X_train,X_test,y_train,y_test = \ train_test_split(X,y,test_size=0.2,random_state = np.random.RandomState(42)) plot_samples(X_train, y_train) plt.show() C = [0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3] for c in C: clf = SVM(C=c) clf.fit(X_train, y_train) # y_pred = clf.predict(X_test) # correct_rate = 1-np.mean(y_test!=y_pred) # print 'correct_rate:',correct_rate print clf.b, clf.w pre = clf.predict(X_test) print 'correct:', np.mean((pre == y_test).astype(int)) plot_samples(X_train, y_train, clf.alpha, c) plot_line(X_train, [clf.b, clf.w[0], clf.w[1]])
def test_regression__SVM_predicts_correctly(self): # Initialises the SVM with the actual default data, so it can make an accurate prediction s = SVM(self.seed_csv, "DEFAULT_VECTORS.csv") test_vec = [] true_class = [] # Tests the predictions on 4 test vectors, each from a different class test_vec.append([0,0,1.31162,1.24726,0.4816,0,0,0.73234,0,0,0.81521,0.06224,0,0,0.22633,0.37036,0,0,1.53459,0.62784,0,0.60669,0.49076,0,0,0,0.30922,0,0.54016,0.27795,0,0,1.98356,0,1.21315,0,0,0,0,0,1.97012,0,0,0,0.63165,0,1.59337,0.22264,0,0,0,0,3.09712,0,0.76984,0,0.50522,0,0,0,0,1.10319,0,0.07691,0.07841,0.54772,2.11118,0,0,0,0.50572,1.42813,0.58463,0,0,0,0,0,0,0,1.63526,0.66817,0,0,0,2.24559,0,0.12274,0.53853,0,0.16904,0.59593,0.37706,0,0,0,0,0,0,0,0.18247,0,0,0,0.21419,0,0,0,3.39588,1.592,0.22454,0,0,0,1.40007,0,2.63516,0,0,0,0,3.88263,0,0,0,0,0,0,0.49433,0.54541,0.81915,0.02762,0.17239,2.37652,0,0,0,0,0,0,0.22927,0,0,0.02774,2.07901,2.15474,0.66015,0,1.26529,0,0,0,0,0.74042,0.82206,0,0,0.47257,0,0,0.60453,0,0,0,0,0,0,0.62863,0,0,0,2.86534,0.35805,0,0.96064,0.41109,0,0,0,2.25517,0,1.61856,0,0,1.65731,0,0,1.29864,0,3.07959,0,2.39562,0.35225,0,0,0,0.62533,0.66331,4.34328,0,0,3.29736,0,0.17106,0,0,0,0,0,0,0,0,1.21999,0,0,0,0,0,0,1.50995,2.5842,0,0,0,0.24641,0.70289,0,0,1.6242,0,3.52911,0,0,0,0.36339,2.81645,0.28324,0,0,0.83023,0,0,0,0,0.92336,0,0.30198,0.04515,0.34268,0,0,0.44364,0,0,0,0,0,0,0,1.37224,0.2919,0,0.31126,0,2.21684,0,0,0.88388,0,0,0,0,0.26138,0.32636,0,0,0,0.17237,0,0.25218,0.14953,0,0.20786,0.27553,0,0,0,0,0,0,1.40562,0,0,0.25112,0.15636,0,0.24745,0,0,1.61772,0,0.37021,1.98729,0,0,0,0,0,0,0.8953,0,0,0,0,0,0,0,0,0,0.40845,0.49982,0.10021,0,0.6387,0,1.805,0,0,0,0,1.59367,0,0,0.06518,0,0,0.67792,0.24059,0,0.65444,0.26577,1.85137,0.37051,0.30627,0,0,0,0,0.45835,0.05224,0,0,0,0,1.95784,0.68442,0,0,0.46775,0.46235,0,0,0,0.66879,0.24915,0,0,0.67409,0,0,0,0,0,0.16826,0,0,0,0,4.36604,3.65546,0,0.70808,0.92704,0,0,0.79427,0,0,0.32633,1.03643,0,0,2.26944,0,0,0,0.67183,0,0.48646,1.45627,0.41296,0,0.18897,0,0,0.8549,0,0.33408,0.53642,0,0,0.63986,0.49393,0,0,0.46142,0,0,0,0.36489,0,0.37085,0.18448,0,0,0,1.69861,0,0,0,1.88457,0,0,2.23312,0.44915,1.54134,0.28418,0,0,0,3.5589,0.00034,0,0.30898,0,0,0,0,0,0,0,0,0,1.52864,0,2.4371,2.90161,0,0,0.09196,0,0,0,2.72136,2.1584,0,0.24534,3.12657,0,0.73631,0.41622,0.43252,3.22055,0.71864,0,0,0.19091,0,0,0,0,3.37778,0,0,0.39621,0,0.74312,0,0.32181,0,0,0,2.11153,0,0.5271,0,0,0,0,0,0.66892,0,1.14697,0,0.80246,2.40683,0,1.83546,0,0,0.00301,0,0.29553,0.4235,2.09133,0,0,0,0,0.84048,0,1.04658,0.73957,2.00601,3.00712,0.40309,0,0.03658,0.77215,0,0,0,0,0.77068,0,1.00322,0.98077,0,0,3.03688,0.13598,1.74532,0,0,0,0,0,0,0.17142,0.86985,0,0,0.46677,0,0.74177,0,0,0.07682,0,0.14155,0,2.45545,0,0,1.13945,0,0,0.87291,0,0,0,1.04163,0.81498,0,3.5985,0,0,0,0,1.46532,0.36748,1.62928,0,0,2.34511,0,0,2.23166,1.0924,0,0,1.51005,0,0,0.15954,0.92975,0,0,0,1.11671,0,0,2.11359,0,0,0,0,0.32786,1.89113,0.3948,0,2.25035,0,0.03137,0,0,0,0,0.14958,0.56114,0,0.23005,0,0,0.56362,0,0.31515,1.56577,0,1.28204,1.97987,0,0.34627,2.95546,1.20468,0,0,0.70529,3.77792,0.83431,0.30329,0,1.78767,0,0,0,0.41707,1.75734,0,0,0.21594,0.11235,0,0,1.7622,0.11631,0.99561,3.95393,0,1.25187,0,0.09957,0,0,0.25556,0,0.19899,0.78673,0,0,0,0.46072,0,0,0.23935,0.51579,1.96154,0.17213,1.13218,0,0,2.64706,0,0,0.19867,0.19443,1.23011,0.42263,0,1.36751,0,1.47292,0,0,1.1933,0.81987,0.67279,0,0,0,0,0.31321,0.38792,0,0.97478,0,2.09358,0,0,0,0,0.59704,0.52108,0,0,0,0,0,0,1.49573,0,1.28203,0,0.15861,1.77033,0,0,0,0.44058,0,0,0.8216,0,0,0,1.28078,3.00126,0.75924,1.75638,0,0.68387,0.23909,0,0,0,0.53911,0,0.55787,0,0,0,0.31723,0,0.15849,0.55158,0,3.48702,0.52798,0,0,1.34605,0,2.78598,0.72608,0.15528,0.37953,0,0,1.30241,0,0,0,2.0312,0,0,0,0.52765,0.69557,0,1.31956,0.44269,0,0,0,0.19423,0.34963,0.66904,0,0.32123,0,1.55632,0.30644,0,0.75316,0.22337,0,0,0,1.70708,0,0.32568,0,2.83094,1.02536,0.68783,0.90727,0.53032,0,0.08307,2.98607,0.13533,0.95933,1.19553,0.15561,0.8755,0,0,0,0,0,0,0,0,0,0,0,0.5449,0,0,0,0,0,0.94743,0,0,1.28715,0,0.23818,1.12691,0,0,0.42063,0,0,0.52871,0.44003,0,0.33548,1.64644,0.18746,0,0.14237,0,0,0,0.11335,0,0.43312,0,0,1.87878,0,0,0.4728,0,0.36483,0.18241,0,0.02107,0,0.47507,0,0.30273,0,0.20793,0,0.29547,0,0,0.62152,0,0,0.35194,3.12088,0,0,2.19738,2.50726,1.1937,0,0,0,0.78965,0,2.92522,0,1.52847,0,0,0.2925,0.37387,0.18928,0,0,0.31143,0,0.081,0,0,0.18183,0,1.11033,0,0.17409,1.31933,0,0.53793,0,0.4661,0.25081,0,0.90499,2.23212,0.70002,0,0,0.12527,0,0,0,0.1894,0,0.82974,0.72874,0,1.15173,0.54735,0.14194,4.34797,0.57146,0,0,0,0.78339,0.46051,0,1.81524,2.45431,0,0,0,3.47303,0,1.7397,0,0,2.95898,0.33029,0.78115,0.06317,0,0.07935,0,0,0,1.38397,0,0,0,0,0.72088,4.46334,0,0,0.23081,0,1.25068,0,0.1824,0,1.21439,2.369,0,0,0.37996,5.12023,0.90615,0,0,0,0,0.43819,0,0.75562,0,0.84137,0,0.4518,0,0,2.88127,0.70104,0,0,4.75537,0,0,0,0,0.13274,0.58304,0,1.03032,0.19526,0,0.38559,1.68267,1.44561,0,0.49244,3.70069,0,0.33095,0]) true_class.append(100.0) test_vec.append([0.66374,0,1.09448,1.717,0.30954,0,0,0.73869,0,0,0,1.61676,0,0,0,0.51643,0,0,1.53844,0.63427,0,1.6026,0.43855,0,0,0,1.04236,0,0,1.43585,0,0,2.89057,0,1.65257,0,0,0,0,0,0.73248,0,0,0,0,0.25792,0.80655,1.1381,0,0,0,0.09845,2.55367,0,1.60068,0,0,0.01422,0,0,0,0.22858,0,1.12436,0,1.30998,2.20225,0,0,0,0,1.15713,0.9265,0,0,0,0,0,0,0,1.65698,0.99414,0,0,0,1.4259,0,0.97503,1.391,0,0,0.76531,0.69529,0,0,0,0,0,0,0,0,0,0,0,0.53737,0,0.25857,0,3.92585,1.53477,0,0,0,0,1.43263,0,2.79462,0.14359,0,0,0,4.4834,0,0,0,0,0,0,0.72255,1.35445,1.46559,0.52822,0.98034,1.82141,0,0,0,0,0,0,0,0,0,0,1.8597,2.37535,0.58132,0,2.32378,0,0,0,0,1.25587,1.88393,0,0,0.54643,0,0,0,0,0,0.50549,0,0,0,0.88133,0.26529,0,0,2.61249,0,0,0,0.28246,0,0,0,2.80048,0,1.8278,0,0,0.83172,0,0,1.21923,0,3.36218,0,2.3542,0.96971,0,0,0,1.54589,0.40725,3.93392,0,0,2.81675,0,0.3898,0,0,0,0,0,0,0,0,0.77211,0,0,0,0,0,0,0.65722,3.16444,0,0,0,0.1705,0,0,0,1.93914,0,3.84408,0,0,0,0.04207,2.27927,1.01503,0,0.51049,0.99761,0,0,0,0,0,0,1.22748,0,1.01792,0,0,0.49426,0,0,0,0,0,0,0,1.24647,0.16684,0,1.51527,0,1.74098,0,0,0,0,0,0,0,0.35721,1.07772,0,0,0,0.93269,0,0.68836,1.54824,0,0,0.907,0,0,0,0,0,0,1.96235,0,0,0.74423,0.26638,0,0,0,0,1.53479,0,0,1.72449,0,0,0,0,0,0,0.67467,0,0.17656,0,0,0,0,0,0,0,0,1.49924,0.88658,0,0.05181,0,1.62675,0,0,0.40106,0,1.43525,0,0,0,0.56202,0,0,0.80513,0,0,0,1.8131,0,1.11759,0,0,0,0,1.32496,0,0,0,0,0,2.0905,0.9224,0,0,0.1981,0.69267,0,0,0,1.02005,1.91671,0.76289,0,1.24847,0,0,0,0,0,0.33998,0,0,0,0,3.75779,3.39876,0,0.77607,1.53696,0,0,1.10539,0,0,0,1.16448,0,0,2.80329,0,0,0,0.85152,0,1.12538,1.28457,1.17446,0,0.39848,0,0,0.70778,0,0.76308,0,0.71419,0,0.79506,0,0,0,0.15374,0,0,0,0.62653,0,0.74721,0.4845,0,0,0,1.34758,0.30017,0,0,1.44436,0,0.92803,1.85258,0,1.93054,0,0,0,0,3.04512,0,0,0,0,0,0,0,0,0,0,0,0,0.64898,0,2.04486,2.49268,0,0,0.83435,0,0,0,3.03945,1.81588,0.59258,0,3.72477,0,0.7872,0,0.00478,2.65265,0.16402,0,0,1.05906,0,0,0,0,4.21306,0,0,0.60746,0,0,0,0,0,1.05785,0,2.43445,0,0.4712,0,0,0,1.02944,0,0.12719,0,0.34327,0.10337,0.6413,2.29918,0,1.98337,0,0,1.2344,0,0,0.35195,2.46061,0,0.07174,0,0,0.0624,0,0.39571,0,2.52472,2.97087,0.24403,0,0,0.00059,0,0,0,0,0.45488,0,1.25671,1.22597,0,0,2.53733,0.25571,1.80606,0,0,0.61512,0,0,0.51823,0.98615,1.13317,0,0,1.36894,0,1.47854,0,0,0.17759,0,0.74312,0,2.33086,0,0,1.38421,0,0.31738,0.05453,0,0,0,0,1.33853,0,4.7626,0.65644,0,0,0,1.23831,1.22663,2.47141,0,0,1.78359,0,0,1.75879,0.80632,0,0,0.42046,0,0,0.26832,0.16474,0,0,0.128,0.98604,0,0,2.01123,0,0,0,0,0,1.50423,1.03771,0,2.15917,0,0.72776,0,0,0,0,1.10143,0.26385,0,0,0,0,0.26364,0,0,2.20408,0,0.79942,2.10013,0,0.44043,2.29232,1.04198,0,0,1.64816,3.17147,1.14608,0.25116,0,0.95239,0,0,0,0,2.00406,0,0.30178,0.92323,0.98771,0,0,0.56034,1.15678,0.89539,4.83602,0,1.0931,0.09767,0.21844,0,0,1.76452,0,0.17914,0,0,0,0,1.62488,0,0,0,0.77664,2.09106,0,1.60118,0,0,2.29174,0,0,0,0.56709,0.94975,0,0,1.47388,0,1.22663,0,0,0.98856,0.35063,0,0,0,0,0,0,1.26911,0,0.30533,0,0.64052,0,0,0,0,0.13419,0,0.46349,0,0,0,0,0,1.36746,0,2.57244,0,0.39204,2.1318,0,0,0,0.44787,0,0,0.17328,0,0,0,0.89555,2.85584,0,1.92577,0,1.03426,0.18141,0,0.02415,0,0,0,0.18818,0,0,0,0.65959,0,1.03499,0.46914,0,3.60509,1.20667,0,0,1.03361,0,2.59794,0,0,0.64221,0,0,0.40343,0,0,0,2.59824,0,0,0,0,0.3536,0,1.22901,0.45871,0,0,0,0.78015,0,0,0,0,0,2.57121,1.32876,0,0,0.3991,0,0,0,1.73541,0,0.59699,0,3.22155,1.30842,0,1.30642,0,0,0.35536,1.77889,0,0.05381,0.8363,0,0.63344,0,0,0,0,0,0,0,0,0,0,0,1.83222,0,0,0,0,0.11816,0.08875,0,0,1.15345,0,0.99687,0.95276,0,0,0.05418,0,0,0,1.64066,0,1.42887,2.70256,1.33271,0,1.39725,0,0,0,0.56957,0,0,0,0,1.88547,0,0,0.32236,0,0,0.30165,0,0.27562,0,0,0,0,0,0.21437,0,0.10263,0,0,0,0,0,0.89189,4.09716,0,0,1.70826,3.18284,1.92224,0,0,0,0.63796,0,2.98539,0,1.67413,0,0,0,1.1747,1.38018,0,0,0.70105,0,0,0,0,0,0,0.1987,0,1.44536,1.21354,0,0.81194,0,0.65285,0,0,1.216,1.5916,0.00447,0,0,0,0,0,0,0,0,0,0,0,1.1216,0.119,0.60053,5.39575,0,0,0,0.34936,1.01728,1.49665,0,2.04303,2.32411,0,0,0,4.03519,0,1.76222,0,0,2.3014,1.62886,1.18102,0.47804,0,0.30708,0,0,0,1.2992,0,0,0,0,1.11595,5.31706,0,0,0.42023,1.32127,1.63448,0,0.97557,0.05671,1.368,2.301,0,0,1.29569,4.84056,0.58183,0,0,0,0,2.41762,0,0.66331,0,0,0,0.58113,0,0,2.01817,0,0,0,4.43796,0,0,0,0,0,0,0,1.35968,1.39548,0,0.74168,2.41537,1.70999,0,0.74709,4.16181,0,1.77807,0]) true_class.append(200.0) test_vec.append([0.0,0.0,1.02378,0.86537,0.52096,0.0,0.0,0.86569,0.0,0.0,1.25239,0.0,0.0,0.10275,0.13453,0.14261,0.0,0.03815,1.45566,0.66246,0.0,0.14194,0.77532,0.0,0.0,0.0,0.0,0.0,0.85025,0.0,0.0,0.0,1.71965,0.0,1.12771,0.0,0.0,0.0,0.0,0.0,2.21248,0.0,0.0,0.0,1.02037,0.0,1.83393,0.0,0.0,0.0,0.0,0.0,3.22355,0.0,0.57061,0.0,0.61325,0.0,0.0,0.0,0.0,1.37463,0.0,0.0,0.31545,0.02344,1.8816,0.0,0.0,0.0,0.79182,1.63796,0.23462,0.0,0.0367,0.0,0.0,0.0,0.0,0.0,1.49467,0.53617,0.0,0.0,0.0,2.63275,0.15545,0.0,0.14912,0.0,0.05899,0.51951,0.08216,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4772,0.0,0.0,0.0,0.07795,0.0,0.0,0.0,3.14931,1.51006,0.35246,0.0,0.0,0.0,1.25366,0.0,2.49412,0.01714,0.0,0.0,0.0,3.87597,0.0,0.0,0.0,0.0,0.0,0.0,0.227,0.36734,0.66069,0.00453,0.0,2.28759,0.0,0.0,0.0,0.0,0.0,0.0,0.48053,0.0,0.0,0.10111,1.86308,1.92005,0.80752,0.0,0.9832,0.01579,0.0,0.0,0.0,0.60559,0.56333,0.0,0.0,0.4936,0.0,0.0,0.95457,0.0,0.0,0.0,0.0,0.0,0.0,0.52915,0.0,0.0,0.0,2.60508,0.75095,0.0,1.36717,0.65725,0.0,0.0,0.0,2.06952,0.0,1.63788,0.0,0.0,1.96992,0.0,0.0,1.42637,0.0,2.98227,0.0,2.31867,0.00732,0.0,0.0,0.0,0.15545,1.00066,4.4553,0.0,0.0,3.22233,0.0,0.04355,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.4462,0.05998,0.0,0.0,0.0,0.0,0.0,1.89134,2.39107,0.0,0.0,0.0,0.22906,0.92307,0.0,0.0,1.45769,0.0,3.38134,0.0,0.0,0.0,0.54563,2.92672,0.03028,0.0,0.0,0.52902,0.0,0.0,0.0,0.0,1.23814,0.0,0.0,0.15778,0.22876,0.0,0.0,0.38084,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.27341,0.43933,0.0,0.0,0.0,2.29396,0.0,0.0,1.31989,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.22661,0.37916,0.0,0.0,0.0,0.0,0.0,0.0,1.17301,0.0,0.0,0.0,0.0,0.0,0.28877,0.0,0.0,1.38802,0.0,0.45544,2.08168,0.0,0.0,0.0,0.0,0.0,0.0,0.95298,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.81061,0.05561,0.0,0.0,0.68155,0.0,2.03513,0.0,0.0,0.0,0.0,1.49072,0.0,0.0,0.49542,0.0,0.0,0.96604,0.0,0.0,0.83591,0.3401,1.84218,0.58514,0.0,0.0,0.0,0.0,0.0,0.17452,0.0,0.0,0.0,0.0,0.0,1.80195,0.43078,0.12558,0.0,0.6141,0.21727,0.0,0.0,0.0,0.38526,0.0,0.0,0.0,0.51386,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.35328,3.54682,0.0,0.64922,0.6055,0.0,0.0,0.65163,0.0,0.0,0.55294,1.03124,0.0,0.0,2.1915,0.0,0.0,0.0,0.50909,0.0,0.0,1.39711,0.0,0.0,0.05591,0.0,0.0,1.00389,0.0,0.0,0.8889,0.0,0.0,0.75612,0.58052,0.0,0.0,0.5278,0.0,0.0,0.0,0.1389,0.0,0.2428,0.0,0.0,0.0,0.0,1.8734,0.0,0.0,0.0,1.78977,0.0,0.0,2.2345,0.70865,1.14073,0.35422,0.0,0.0,0.0,3.66343,0.0,0.0,0.4082,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.87945,0.0,2.49299,3.00523,0.0,0.0,0.01184,0.0,0.01448,0.0,2.63768,2.13688,0.0,0.62724,2.99925,0.0,0.70589,0.81796,0.71982,3.14888,0.89087,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.26729,0.0,0.0,0.4211,0.04467,0.98575,0.0,0.51702,0.0,0.0,0.0,2.09895,0.0,0.66031,0.0,0.0,0.0,0.0,0.0,0.84436,0.0,1.44374,0.01572,1.12008,2.37219,0.0,1.79202,0.0,0.0,0.0,0.0,0.44119,0.62798,2.03361,0.0,0.0,0.0,0.0,1.03021,0.0,1.4258,1.22678,1.8237,2.87319,0.56243,0.0,0.28223,1.06628,0.0,0.0,0.0,0.0,0.89911,0.0,0.98212,0.65271,0.0,0.0,3.05927,0.09557,1.73109,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.45386,0.22503,0.0,0.0,0.0,0.32894,0.0,0.07809,0.0,0.0,0.0,0.0,2.46268,0.0,0.0,1.07242,0.0,0.0,1.02452,0.0,0.0,0.0,1.36057,0.36657,0.0,3.37368,0.0,0.0,0.0,0.0,1.42513,0.08506,1.41931,0.0,0.0,2.42289,0.0,0.0,2.26081,1.08687,0.0,0.0,1.92282,0.0,0.0,0.34711,0.92995,0.0,0.0,0.0,1.00544,0.0,0.0,1.84394,0.0,0.0,0.0,0.0,0.54121,1.87131,0.0,0.0,1.90903,0.0,0.00192,0.0,0.0,0.0,0.0,0.0,0.79236,0.0,0.28244,0.0,0.0,0.38696,0.0,0.52426,1.24895,0.0,1.43088,2.01694,0.0,0.25583,2.92475,1.31583,0.0,0.0,0.41913,3.73163,0.67996,0.00378,0.0,2.05286,0.0,0.0,0.0,0.85315,1.5056,0.04749,0.02491,0.08462,0.0,0.0,0.0,2.09839,0.0,0.88255,3.86387,0.0,1.24396,0.0,0.0,0.0,0.0,0.0,0.0,0.45548,1.01171,0.0,0.0,0.0,0.06008,0.0,0.0,0.32583,0.11706,1.73475,0.46403,0.88229,0.0,0.0,2.65293,0.0,0.0,0.14577,0.16059,1.15921,0.76016,0.0,1.13632,0.0,1.51864,0.0,0.0,1.12836,0.91533,1.01396,0.0,0.0,0.0,0.0,0.72904,0.0,0.0,1.30529,0.0,2.56936,0.0,0.0,0.0,0.0,0.6739,0.66264,0.0,0.0,0.0,0.0,0.0,0.0,1.56187,0.0,0.61619,0.0,0.0,1.54011,0.0,0.0,0.0,0.50495,0.0,0.0,1.09793,0.0,0.0,0.0,1.586,2.60731,0.89228,1.74828,0.0,0.80611,0.26768,0.0,0.0,0.0,0.97478,0.0,0.82535,0.0,0.0,0.0,0.0,0.0,0.0,0.59424,0.0,3.40208,0.29945,0.0,0.0,1.40285,0.0,2.53214,0.89456,0.35867,0.01142,0.0,0.0,1.72754,0.0,0.0,0.0,1.96116,0.0,0.0,0.0,0.67893,0.93432,0.0,1.17659,0.34302,0.0,0.0,0.0,0.08902,0.79611,0.91685,0.0,0.4134,0.0,1.26203,0.0,0.0,1.0672,0.0,0.0,0.0,0.0,1.87626,0.0,0.07163,0.0,2.77396,0.8359,1.01072,0.69999,0.67118,0.0,0.02703,3.21478,0.17149,1.29662,1.4282,0.22523,0.88462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.15945,0.0,0.0,0.0,1.20521,0.0,0.0,1.14212,0.0,0.0,1.06764,0.0,0.0,0.48324,0.0,0.0,0.93048,0.0,0.0,0.04668,1.24215,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.63506,0.0,0.0,2.0197,0.0,0.0,0.64068,0.0,0.67773,0.07286,0.0,0.07325,0.0,0.61848,0.0,0.51642,0.0,0.13415,0.0,0.24762,0.0,0.0,1.138,0.0,0.05095,0.09744,2.72067,0.0,0.0,2.17862,2.24626,0.78026,0.0,0.0,0.0,0.91128,0.0,2.93209,0.0,1.68081,0.0,0.0,0.36658,0.02664,0.0,0.0,0.0,0.08065,0.0,0.02219,0.0,0.0,0.15345,0.0,1.36435,0.0,0.0,1.28141,0.0,0.13168,0.07221,0.21724,0.59902,0.0,0.64866,2.38968,0.85281,0.0,0.0,0.36304,0.0,0.0,0.0,0.08364,0.0,1.41225,1.02908,0.0,1.03806,0.7982,0.0,3.80226,0.85739,0.0,0.0,0.0,0.87857,0.16096,0.0,1.59718,2.50409,0.0,0.0,0.0,3.12388,0.0,1.52823,0.0,0.0,3.08214,0.0,0.74487,0.2059,0.0,0.07696,0.0,0.0,0.0,1.26256,0.0,0.0,0.0,0.0,0.45868,4.24342,0.0,0.0,0.00141,0.0,1.17591,0.0,0.0,0.0,0.99489,2.00839,0.0,0.0,0.0,5.00877,1.04824,0.0,0.0,0.0,0.0,0.0,0.0,0.82028,0.0,1.21763,0.0,0.14246,0.0,0.0,3.0032,0.90946,0.0,0.0,4.64447,0.0,0.0,0.0,0.0,0.26529,0.94394,0.0,0.99551,0.0,0.0,0.183,1.53081,1.37277,0.0,0.22932,3.72266,0.0,0.0,0.0]) true_class.append(300.0) test_vec.append([0.29979,0,0.73698,0.99874,0.17627,0,0.00099,0.36331,0,0,0,0.78206,0,0,0.00081,0.31148,0,0,0.88571,0.3403,0,0.90578,0.16888,0,0,0,0.60339,0,0,0.79334,0,0,1.57426,0,0.87785,0,0,0,0,0,0.5187,0,0,0.04702,0,0.07962,0.48724,0.64936,0,0,0,0.06892,1.45448,0,0.85457,0,0,0.03112,0,0,0,0.18512,0,0.57216,0,0.79528,1.2721,0,0,0,0,0.6194,0.582,0,0,0,0,0,0,0,0.95408,0.53562,0,0,0,0.80414,0,0.49622,0.78993,0,0,0.43597,0.44195,0,0,0,0,0,0,0,0,0,0,0,0.30615,0,0.07983,0,2.20337,0.87327,0,0,0,0,0.85689,0,1.58373,0.04835,0,0,0,2.41131,0,0,0,0,0,0,0.46916,0.71141,0.76534,0.241,0.52692,1.13465,0,0,0,0,0,0,0,0,0,0,1.14936,1.35962,0.29899,0,1.26081,0,0,0,0,0.65909,0.99672,0,0,0.28275,0,0,0,0,0,0.20353,0,0,0,0.49067,0.13259,0,0,1.56771,0,0,0,0.09744,0,0,0,1.55713,0,0.99034,0,0,0.50887,0,0,0.65799,0,1.84948,0,1.34226,0.58093,0,0,0,0.8835,0.17832,2.20285,0,0,1.66286,0,0.21995,0,0,0,0,0,0,0,0,0.40228,0,0,0,0,0,0,0.372,1.73578,0,0,0,0.11036,0,0,0,1.0769,0,2.15672,0,0,0,0.0082,1.322,0.53862,0,0.30636,0.63966,0,0,0,0,0,0,0.69544,0,0.53921,0,0,0.29731,0,0,0,0,0,0,0,0.75803,0.07249,0,0.80661,0,0.9905,0,0,0,0,0,0,0,0.26491,0.66857,0,0,0,0.53896,0,0.41213,0.78647,0,0.00852,0.41189,0,0,0,0,0,0,1.06587,0,0,0.4446,0.18852,0,0,0,0,0.95407,0,0.00726,0.95692,0,0,0,0,0,0,0.38208,0,0.11971,0,0,0,0,0,0,0,0,0.8405,0.47058,0,0.09299,0,0.86145,0,0,0.20589,0,0.84098,0,0,0,0.24209,0,0,0.47782,0,0,0,1.03085,0,0.64727,0,0,0,0,0.70496,0.0213,0,0,0,0,1.20441,0.57091,0,0,0.11086,0.43955,0,0,0,0.59741,1.00779,0.28216,0,0.66311,0,0,0,0,0,0.22516,0,0,0,0,2.19657,1.97806,0,0.43672,0.87214,0,0,0.62402,0,0,0,0.64836,0,0,1.51232,0,0,0,0.51461,0,0.7417,0.72421,0.67749,0,0.24982,0,0,0.36151,0,0.49732,0,0.31498,0,0.38781,0,0,0,0.09475,0,0,0,0.38615,0,0.40264,0.29188,0,0,0,0.7335,0.04352,0,0,0.87097,0,0.4231,1.0869,0,1.14827,0,0,0,0,1.76639,0.02084,0,0,0,0,0,0,0,0,0,0,0,0.37906,0,1.18141,1.41771,0,0,0.37898,0,0,0,1.67243,1.04663,0.27132,0,2.04878,0,0.45091,0,0,1.57672,0.09582,0,0,0.54758,0,0,0,0,2.28798,0,0,0.29653,0,0,0,0,0,0.49615,0,1.30677,0,0.24773,0,0,0,0.47095,0,0.09667,0,0.23898,0,0.28521,1.29733,0,1.11683,0,0,0.57324,0,0,0.135,1.34796,0,0.06995,0,0,0.05933,0,0.20252,0,1.40791,1.67938,0.09137,0,0,0.00323,0,0,0,0,0.26897,0,0.67938,0.75985,0,0,1.45494,0.13991,0.9974,0,0,0.31338,0,0,0.24889,0.51407,0.73345,0,0,0.79609,0,0.88526,0,0,0.17425,0,0.40718,0,1.31727,0,0,0.75527,0,0.12083,0.09926,0,0,0,0,0.83522,0,2.54378,0.3604,0,0,0,0.75086,0.66623,1.33212,0,0,1.04355,0,0,1.03713,0.46766,0,0,0.26269,0,0,0.05659,0.18704,0,0,0.04968,0.6267,0,0,1.22371,0,0,0,0,0,0.89464,0.64762,0,1.32455,0,0.32702,0,0,0,0,0.53211,0.12871,0,0,0,0,0.23914,0,0,1.23011,0,0.4656,1.142,0,0.28341,1.38303,0.57182,0,0,0.88852,1.86646,0.64204,0.23273,0,0.56413,0,0,0,0,1.16332,0,0.08161,0.43951,0.4517,0,0,0.35888,0.59994,0.52817,2.58889,0,0.60941,0.05501,0.15675,0,0,0.90365,0,0.01422,0,0,0,0,0.8736,0,0,0,0.51084,1.21025,0,0.90287,0,0,1.32412,0,0,0.00724,0.26563,0.56966,0,0,0.89074,0,0.71696,0,0,0.59322,0.24448,0,0,0,0,0,0,0.72116,0,0.16385,0,0.41472,0,0,0,0,0.13305,0.04111,0.16913,0,0,0,0,0,0.74869,0,1.45263,0,0.25038,1.21191,0,0,0,0.2036,0,0,0.10333,0,0,0,0.45472,1.73035,0.01098,1.03492,0,0.47335,0.10783,0,0,0,0,0,0.08411,0,0,0,0.44192,0,0.5106,0.2365,0,2.02061,0.64631,0,0,0.60439,0,1.55193,0,0,0.43662,0,0,0.20695,0,0,0,1.3956,0,0,0,0,0.16904,0,0.76331,0.29432,0,0,0,0.39298,0,0,0,0.00712,0,1.39828,0.71762,0,0,0.276,0,0,0,0.92375,0,0.37562,0,1.75298,0.76313,0,0.74957,0,0,0.17594,1.07444,0,0.03816,0.42649,0,0.37136,0,0,0,0,0,0,0,0,0,0,0,1.08903,0,0,0,0,0.03687,0.09573,0,0,0.67596,0,0.55691,0.56566,0,0,0.05842,0,0,0,0.97777,0,0.74614,1.48355,0.64294,0,0.69026,0,0,0,0.28099,0,0,0,0,1.01275,0,0,0.16191,0,0,0.19777,0,0.11041,0,0,0,0,0,0.14008,0,0.08993,0,0,0,0,0,0.50726,2.26588,0,0,1.03037,1.75822,1.10571,0,0,0,0.34841,0,1.65546,0,0.87362,0,0,0,0.64538,0.73849,0,0,0.4205,0,0.02042,0,0,0,0,0.16426,0,0.76918,0.70498,0,0.53496,0,0.40506,0,0,0.73649,0.94325,0.04285,0,0,0,0,0,0,0.01573,0,0,0,0,0.67038,0.02551,0.37146,3.0708,0,0,0,0.13983,0.51937,0.77669,0,1.17095,1.32168,0,0,0,2.26229,0,1.04377,0,0,1.34784,0.87796,0.61146,0.18542,0,0.15649,0,0,0,0.79716,0,0,0,0,0.64443,2.91423,0,0,0.28661,0.6302,0.87206,0,0.51836,0.00878,0.81573,1.3959,0,0,0.71026,2.77243,0.32368,0,0,0,0,1.26471,0,0.35446,0,0,0,0.39423,0,0,1.21582,0,0,0,2.57459,0,0,0,0,0,0,0,0.74495,0.73003,0,0.42348,1.33552,0.94824,0,0.45369,2.26857,0,0.94495,0]) true_class.append(400.0) prediction, probability = s.predict(test_vec) self.assertEqual(prediction, true_class)
# 朴素贝叶斯 # multi = MultinomialNB() # multi = multi.fit(train_mood_array, label) # joblib.dump(multi, 'model/gnb.model') # muljob = joblib.load('model/gnb.model') # result = muljob.predict(test_word_array) # count = 0 # for i in range(len(test_word_array)): # type = result[i] # if type != test_word_arrayLabel[i]: # count = count + 1 # print("mul", count / float(testCount)) # PosWords, NegWords, prior_Pos = trainingNaiveBayes(train_mood_array, label) # D = np.ones(len(vocabList)) # DS_temp = D # predict(PosWords, NegWords, prior_Pos, test_word_array, D, DS_temp) # SVM model = SVM(max_iter=10, kernel_type='linear', C=1.0, epsilon=0.001) for i in range(len(label)): if label[i] == 2: label[i] = -1 for i in range(len(test_word_arrayLabel)): if test_word_arrayLabel[i] == 2: test_word_arrayLabel[i] = -1 model.fit(np.array(train_mood_array), np.array(label)) y_hat = model.predict(np.array(test_word_array)) num = 0.0 for i in range(len(y_hat)): if y_hat[i] != test_word_arrayLabel[i]: num = num + 1 print(num / len(test_word_arrayLabel))
from SVM import SVM, polynomial_kernel from SVM_utils import * X1, y1, X2, y2 = gen_non_lin_separable_data() X_train, y_train = split_train(X1, y1, X2, y2) X_test, y_test = split_test(X1, y1, X2, y2) model = SVM(polynomial_kernel) model.fit(X_train, y_train) y_predict = model.predict(X_test) correct = np.sum(y_predict == y_test) print("%d out of %d predictions correct" % (correct, len(y_predict))) plot_contour(X_train[y_train == 1], X_train[y_train == -1], model)
n_informative=5, random_state=1111, n_classes=2, class_sep=1.75, ) # y的标签取值{0,1} 变成 {-1, 1} y = (y * 2) - 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) # 对数据归一化,消除量纲影响 # X_train ,X_test = normalize(X_train),normalize(X_test) # 构造SVM分类器 model = SVM(X_train, y_train, kernel="RBF") model.fit() y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) #print(y_pred,y_test) print("随机产生的分类数据的 Accuracy:", accuracy) print() print('--------------------------------------------------------------') print() # 二分类,我们将鸢尾花中类别为1,2的样本,设为label=1;类别为0的样本,label为-1 data = datasets.load_iris() X = data.data Y = data.target temp = [] for label in Y: if label == 0: temp.append(-1)
% (result[0], len(gta_profs), result[1], len(viral_profs))) else: # Otherwise classify test set # Make sure queries set if args.queries == None: print("The query file was not specified. Please declare queries using -q.") else: # All good # Load test set test_file = args.queries[0] test_profs = Loader.load(test_file) # Make features if args.kmer: feats.kmer_feat(test_profs) if args.pseaac: feats.pseaac(lam=int(args.pseaac), weight=PSE_WEIGHT, profiles=test_profs) if args.physico: feats.physicochem(profiles=test_profs) # Classify svm.predict(test_profs) # Print results if mini: print("Gene\t\tClass") for profile in test_profs: print(">%s\t%s" % (profile.name, profile.label)) else: print("%-*s%-*s%-*s" % (55, "Gene", 10, "Score", 5, "Classification")) for profile in test_profs: print(">%-*s%-*f%-*s" % (55, profile.org_name, 10, profile.score, 5, profile.label)) end = time.time() print(end - start)
for cs in cellsize: #提取训练集和验证集的HOG特征 hog_train = hog_extraction(x_train, size=cs) hog_val = hog_extraction(x_val, size=cs) for lr in learning_rate: for rs in regularization_strength: svm = SVM() #训练 history_loss = svm.train(hog_train, y_train, reg=rs, learning_rate=lr, num_iters=2000) #预测验证集类别 y_pre = svm.predict(hog_val) #计算验证集精度 acc = np.mean(y_pre == y_val) #选取精度最大时的最优模型 if (acc > max_acc): max_acc = acc best_learning_rate = lr best_regularization_strength = rs best_cellsize = cs best_svm = svm print( "cellsize=%d,learning_rate=%e,regularization_strength=%e,val_accury=%f" % (cs, lr, rs, acc)) #输出最大精度
import numpy as np from SVM import SVM def load_data(file): X = [] y = [] with open(file) as f: for each_line in f.readlines(): data = each_line.strip().split() y.append(float(data[0])) x = [0 for i in range(784)] for s in data[1:-1]: ind, color = map(int, s.split(":")) x[ind - 1] = float(color) X.append(x) return np.array(X), np.array(y) if __name__ == "__main__": X_train, y_train = load_data("train-01-images.svm") X_test, y_test = load_data("test-01-images.svm") clf = SVM() clf.fit(X_train, y_train) y_predict = clf.predict(X_test) correct = np.sum(y_predict == y_test) print("%d out of %d predictions correct(%.2f%%)." % (correct, len(y_test), correct / len(y_test) * 100))
def predict(): rootdir = 'E:/python/stockdata/train' stock_num = os.path.join(rootdir, str(input("请输入想要预测的股票号码")) + '.csv') stock_path = stock_num.replace("\\", "/") if os.path.isfile(stock_path): data = pd.read_csv("%s" % stock_path, header=0) #"%s"%path[i], header = 0) acc = SVM(data)[0] print("此股票的预测精度为:", acc) in_close = float(input("请输入昨天的收盘价格")) in_ma5 = float(input("请输入昨天的5日均线")) in_ma10 = float(input("请输入昨天的10日均线")) in_ma20 = float(input("请输入昨天的20日均线")) in_volume = float(input("请输入昨天的成交量")) v_ma5 = float(input("请输入昨天的5日成交量")) v_ma10 = float(input("请输入昨天的10日成交量")) turnover = float(input("请输入昨天的流通量")) #v_ma20 = float(input("请输入昨天的20日成交量")) ma5 = (in_close - in_ma5) / in_ma5 ma10 = (in_close - in_ma10) / in_ma10 ma20 = (in_close - in_ma20) / in_ma20 vma5 = (in_volume - v_ma5) / v_ma5 vma10 = (in_volume - v_ma10) / v_ma10 #vma20 = in_volume - v_ma20 #####是否进行标准化: preprocessing.scale()###### # in_close = np.vstack((np.array(data['close'])[1::].reshape(dimension-1,1), in_close)) # ma5 = np.vstack((np.array(data['ma5_trend'])[1::].reshape(dimension-1,1), ma5)) # ma10 = np.vstack((np.array(data['ma10_trend'])[1::].reshape(dimension-1,1), ma10)) # ma20 = np.vstack((np.array(data['ma20_trend'])[1::].reshape(dimension-1,1), ma20)) # in_close = preprocessing.scale(np.vstack((np.array(data['close'])[1::].reshape(dimension-1,1), in_close))) # ma5 = preprocessing.scale(np.vstack((np.array(data['ma5_trend'])[1::].reshape(dimension-1,1), ma5))) # ma10 = preprocessing.scale(np.vstack((np.array(data['ma10_trend'])[1::].reshape(dimension-1,1), ma10))) # ma20 = preprocessing.scale(np.vstack((np.array(data['ma20_trend'])[1::].reshape(dimension-1,1), ma20))) # in_volume = preprocessing.scale(np.vstack((np.array(data['volume'])[1::].reshape(dimension-1,1), in_volume))) # vma5 = preprocessing.scale(np.vstack((np.array(data['v_ma5_trend'])[1::].reshape(dimension-1,1), vma5))) # vma10 = preprocessing.scale(np.vstack((np.array(data['v_ma10_trend'])[1::].reshape(dimension-1,1), vma10))) # vma20 = preprocessing.scale(np.vstack((np.array(data['v_ma20_trend'])[1::].reshape(dimension-1,1), vma20))) x = np.hstack((ma5, ma10, ma20, vma5, vma10, turnover)) #, vma20[-1]] x = x.reshape(1, 6) #x = np.array(x).reshape(1,7) #vma20(1,8) clf = SVM(data)[1] data_x = SVM(data)[2] data_y = SVM(data)[3] pred = clf.predict(x) #若不需要进行标准化,则改为clf.predict(train_x) if pred < 0: print("预测今日为:跌", pred) else: print("预测今日为:涨", pred) #可视化 plot_x = data['date'].values[0:20] plot_y = data_y[0:20] plt.scatter(data['date'].values[0:20], data_y[0:20], color='darkorange', label='data') plt.hold('on') plt.plot([0, len(plot_x)], [0, 0], '--', color='g') plt.plot(plot_x, plot_y, color='navy', lw=2, label='Origin model') plt.plot(plot_x, clf.fit(data_x, data_y).predict(data_x)[0:20], color='cornflowerblue', lw=2, label='Linear model') plt.xlabel('data') plt.ylabel('target') plt.title('SVM-Stock-Prediction') plt.legend() plt.show()
from SVM import SVM, polynomial_kernel from SVM_utils import * X1, y1, X2, y2 = gen_non_lin_separable_data() X = np.vstack((X1, X2)) y = np.hstack((y1, y2)) model = SVM(polynomial_kernel) model.fit(X, y) y_predict = model.predict(X) correct = np.sum(y_predict == y) print("%d out of %d predictions correct" % (correct, len(y_predict))) plot_contour(X[y == 1], X[y == -1], model)
## predict baseline_predict = np.array(baseline.predict([last_data])) baseline_predict = codeDenormalize(baseline_predict, code=code) # print('baseline prediction:', baseline_predict, '\n') ## Baseline2 baseline2_score = scoreCal(test_input, test_target, test_output, variation=[1,1,1,1,1]) print('baseline2 score:', baseline2_score) ## SVM svm = SVM() svm.fit(train_input,train_target) svm.save(code=code) # svm.load(code=code) test_output = svm.predict(test_input) ## denormalize test_output = codeDenormalize(test_output, code=code) test_data = denorm_test_input test_target = denorm_test_target ## score svm_score = scoreCal(test_input, test_target, test_output) svm_abs_score = scoreCal(test_input, test_target, test_output, count_variation=False) print('svm score:', svm_score) print('svm abs score:', svm_abs_score) ## predict svm_predict = svm.predict([last_data[-15:]]) svm_predict = codeDenormalize(svm_predict, code=code)
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, f1_score from SVM import SVM X, y = datasets.make_blobs(n_samples=100, n_features=2, centers=2, cluster_std=1.5, random_state=42) y = np.where(y == 0, -1, 1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) svm = SVM() svm.fit(X_train, y_train) y_pred = svm.predict(X_test) print("SVM classification accuracy:", accuracy_score(y_test, y_pred)) print("SVM classification f1 score:", f1_score(y_test, y_pred)) def visualize_svm(): ''' xi.w+b = -1 here xi is -ve SV and yi is -1 xi.w+b = +1 here xi is +ve SV and yi is +1 decision Boundary: yi(xi.w+b)=0 ''' def get_hyperplane_value(x, w, b, offset): return (-w[0] * x + b + offset) / w[1]
c='w', edgecolors='orange', s=150, label='Support Vector') plt.scatter(positive_xs[:, 0], positive_xs[:, 1], c='#00CED1', s=60, label='Great (positive)') plt.scatter(negative_xs[:, 0], negative_xs[:, 1], c='#DC143C', s=60, label='Awful (negative)') preds = Linear_SVM.predict(xs).reshape(x0.shape) plt.contour(x0, x1, preds, [0.5], linewidths=2, colors='m') plt.xlabel("x[0]: Density") plt.ylabel("x[1]: Sugar Content") plt.title("Outcome of Linear SVM") plt.legend() # plot outcome of Gaussian SVM plt.subplot(1, 2, 2) plt.scatter(Gaussian_SVM.support_vectors[:, 0], Gaussian_SVM.support_vectors[:, 1], marker='o', c='w', edgecolors='orange', s=150, label='Support Vector')
) else: # All good # Load test set test_file = args.queries[0] test_profs = Loader.load(test_file) # Make features if args.kmer: feats.kmer_feat(test_profs) if args.pseaac: feats.pseaac(lam=int(args.pseaac), weight=PSE_WEIGHT, profiles=test_profs) if args.physico: feats.physicochem(profiles=test_profs) # Classify svm.predict(test_profs) # Print results if mini: print("Gene\t\tClass") for profile in test_profs: print(">%s\t%s" % (profile.name, profile.label)) else: print("%-*s%-*s%-*s" % (55, "Gene", 10, "Score", 5, "Classification")) for profile in test_profs: print(">%-*s%-*f%-*s" % (55, profile.org_name, 10, profile.score, 5, profile.label)) end = time.time() print(end - start)
if __name__ == "__main__": iris = pd.read_csv('iris - setosa.csv') labels = iris.species del iris['species'] labels = labels.as_matrix() features = iris.as_matrix() X_train,X_test,y_train,y_test = train_test_split(features,labels,test_size = 0.333) svm_start = time.time() clf = SVM(kernel = 'linear', C = 0.01) clf.fit(X_train,y_train) pred = clf.predict(X_test) svm_end = time.time() print("SVM",accuracy_score(y_test,pred)*100) print("It took",svm_end - svm_start,"seconds for SVM to train") lssvm_start = time.time() clf = LSSVM(kernel = 'linear', C = 0.01) clf.fit(X_train,y_train) lssvm_end = time.time() pred_ls = clf.predict(X_test) pred_ls_train = clf.predict(X_train) print("LS TEST",accuracy_score(y_test,pred_ls)*100) print("LS TRAIN",accuracy_score(y_train,pred_ls_train)*100) print("It took",lssvm_end - lssvm_start,"seconds for LS-SVM to train" ) start_time = time.time()
classifier = SVM(dims=6, reg=0.001) classifier.train(data=train, seasons=50) classifier.plot_all(reg="0.001") classifier.evaluate(validation) classifier = SVM(dims=6, reg=0.01) classifier.train(data=train, seasons=50) classifier.plot_all(reg="0.1") classifier.evaluate(validation) TestData = genfromtxt('test.txt',delimiter=',',dtype=str) X = TestData[:,CONTINOUS] X = X.astype(float) X = X - np.mean(X, axis=0) X /= np.std(X, axis=0) predict_y = classifier.predict(X) file = open("submission.txt", "w") for i in predict_y: if i==-1: file.write("<=50K\n") else: file.write(">50K\n") file.close() classifier = SVM(dims=6, reg=0.1) classifier.train(data=train, seasons=50) classifier.plot_all(reg="1") classifier.evaluate(validation) classifier = SVM(dims=6, reg=1)
# plt.ylabel('loss') # plt.show() start=time.clock() #使用验证集调参 learning_rate=[7e-6,1e-7,3e-7] regularization_strength=[1e4,3e4,5e4,7e4,1e5,3e5,5e5] max_acc=-1.0 for lr in learning_rate: for rs in regularization_strength: svm=SVM() #训练 history_loss=svm.train(x_train,y_train,reg=rs,learning_rate=lr,num_iters=2000) #预测验证集类别 y_pre=svm.predict(x_val) #计算验证集精度 acc=np.mean(y_pre==y_val) #选取精度最大时的最优模型 if(acc>max_acc): max_acc=acc best_learning_rate=lr best_regularization_strength=rs best_svm=svm print("learning_rate=%e,regularization_strength=%e,val_accury=%f"%(lr,rs,acc)) print("max_accuracy=%f,best_learning_rate=%e,best_regularization_strength=%e"%(max_acc,best_learning_rate,best_regularization_strength)) end=time.clock() #用最优svm模型对测试集进行分类的精度