from SMO import * from sklearn.model_selection import train_test_split import numpy as np x = np.random.normal(size=(5, )) y = np.outer(x, x) z = np.random.multivariate_normal(np.zeros(5) - 2, y, size=100) x1 = np.random.normal(size=(5, )) y1 = np.outer(x1, x1) z1 = np.random.multivariate_normal(np.zeros(5) + 2, y1, size=100) X_train = np.r_[z, z1] Y_train = np.r_[[1 for x in range(100)], [-1 for x in range(100)]] X_train, X_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.33, random_state=42) model = SMO() model.fit(X_train, y_train) print('score:', model.score(X_test, y_test))
def train(self, C=[0.01, 1, 10, 100], tol=1e-3): m = self.Y.shape[0] A = [0] * 10 B = [0] * 10 indices = numpy.random.permutation( self.X.shape[0]) # shape[0]表示第0轴的长度,通常是训练数据的数量 rand_data_x = self.X[indices] rand_data_y = self.Y[indices] # data_y就是标记(label) l = int(len(indices) / 10) for i in range(9): A[i] = rand_data_x[i * l:i * l + l] B[i] = rand_data_y[i * l:i * l + l] A[9] = rand_data_x[9 * l:] B[9] = rand_data_y[9 * l:] # ''' # X_num=self.X.shape[0] # train_index=range(X_num) # test_size=int(X_num*0.1)+1 # for i in range(9): # test_index=[] # for j in range(test_size): # randomIndex=int(numpy.random.uniform(0,len(train_index))) # test_index.append(train_index[randomIndex]) # #del train_index[randomIndex] # A[i]=self.X[test_index,:] # B[i]=self.Y[test_index,:] # A[9]=self.X.ix_[train_index] # B[9]=self.Y.ix_[train_index] # ''' acc_best = 0 C_best = None avg_acc = 0 # gamma_best = None for CVal in C: # for gammaVal in gamma: # avg_acc = 0 for i in range(10): X_test = A[i] Y_test = B[i] # X_train = None # Y_train = None #model= SMO.SMO_Model(X_train, Y_train, CVal, kernel,gammaVal, tol=1e-3, eps=1e-3) #output_model=SMO.SMO(model) #根据output_model的参数信息计算对应decision_function----->推得accuracy #acc = _evaulate(output_model) X_train = numpy.concatenate([ A[(i + 1) % 10], A[(i + 2) % 10], A[(i + 3) % 10], A[(i + 4) % 10], A[(i + 5) % 10], A[(i + 6) % 10], A[(i + 7) % 10], A[(i + 8) % 10], A[(i + 9) % 10] ], axis=0) Y_train = numpy.concatenate([ B[(i + 1) % 10], B[(i + 2) % 10], B[(i + 3) % 10], B[(i + 4) % 10], B[(i + 5) % 10], B[(i + 6) % 10], B[(i + 7) % 10], B[(i + 8) % 10], B[(i + 9) % 10] ], axis=0) # SMO.GG = gammaVal # calculate Kernel Matrix then pass it to SMO. if self.IK: if self.kernel_dict['type'] == 'TANH': K = Kernel.TANH(X_train.shape[0], self.kernel_dict['c'], self.kernel_dict['d']) K.calculate(X_train) elif self.kernel_dict['type'] == 'TL1': K = Kernel.TL1(X_train.shape[0], self.kernel_dict['rho']) K.calculate(X_train) p1, p2 = trans_mat(K.kernelMat) K.kernelMat = np.dot((p1 - p2), K.kernelMat) if self.kernel_dict['type'] == 'RBF': K = Kernel.RBF(X_train.shape[0], self.kernel_dict['gamma']) K.calculate(X_train) elif self.kernel_dict['type'] == 'LINEAR': K = Kernel.LINEAR(X_train.shape[0]) K.calculate(X_train) elif self.kernel_dict['type'] == 'POLY': K = Kernel.POLY(X_train.shape[0], self.kernel_dict['c'], self.kernel_dict['d']) K.calculate(X_train) elif self.kernel_dict['type'] == 'TANH': K = Kernel.TANH(X_train.shape[0], self.kernel_dict['c'], self.kernel_dict['d']) K.calculate(X_train) elif self.kernel_dict['type'] == 'TL1': K = Kernel.TL1(X_train.shape[0], self.kernel_dict['rho']) K.calculate(X_train) model = SMO.SMO_Model(X_train, Y_train, CVal, K, tol=1e-3, eps=1e-3) output_model = SMO.SMO(model) #IK if self.IK: output_model.alphas = np.dot((p1 - p2), output_model.alphas) acc = SMO._evaluate(output_model, X_test, Y_test) avg_acc = avg_acc + acc / 10 if avg_acc > acc_best: acc_best = avg_acc #更新C gamma C_best = CVal # gamma_best =gammaVal # self.gamma = gamma_best #最后一遍train # SMO.GG = gamma_best #!K if self.IK: if self.kernel_dict['type'] == 'TANH': K = Kernel.TANH(self.X.shape[0], self.kernel_dict['c'], self.kernel_dict['d']) K.calculate(self.X) elif self.kernel_dict['type'] == 'TL1': K = Kernel.TL1(self.X.shape[0], self.kernel_dict['rho']) K.calculate(self.X) p1, p2 = trans_mat(K.kernelMat) K.kernelMat = np.dot((p1 - p2), K.kernelMat) if self.kernel_dict['type'] == 'RBF': K = Kernel.RBF(self.X.shape[0], self.kernel_dict['gamma']) K.calculate(self.X) elif self.kernel_dict['type'] == 'LINEAR': K = Kernel.LINEAR(self.X.shape[0]) K.calculate(self.X) elif self.kernel_dict['type'] == 'POLY': K = Kernel.POLY(self.X.shape[0], self.kernel_dict['c'], self.kernel_dict['d']) K.calculate(self.X) elif self.kernel_dict['type'] == 'TANH': K = Kernel.TANH(self.X.shape[0], self.kernel_dict['c'], self.kernel_dict['d']) K.calculate(self.X) elif self.kernel_dict['type'] == 'TL1': K = Kernel.TL1(self.X.shape[0], self.kernel_dict['rho']) K.calculate(self.X) SVM_model = SMO.SMO( SMO.SMO_Model(self.X, self.Y, C_best, K, tol=1e-3, eps=1e-3)) # 参数传递给最后生成的SVM类 if self.IK: SVM_model.alphas = np.dot((p1 - p2), SVM_model.alphas) self.X = SVM_model.X self.Y = SVM_model.y self.kernel_dict = SVM_model.kernel self.alphas = SVM_model.alphas self.b = SVM_model.b # C_best = C # gamma_best =gamma # (w,b) = SMO(X_train,Y_train,C_best,gamma_best,kernal,tol=1e-3) # self.w = w # self.b = b return None
i.pop(0) tmp = [] for j in range(784): tmp.append(0) for j in i: plz = j.find(':') tmp[int(j[0:plz])] = int(j[plz + 1:]) * r dataset.append(tmp) dataset = np.array(dataset) label = np.array(label) # ytmp = label.reshape(-1, 1) * 1. # xtmp = ytmp * dataset alpha, x, y, b = SMO(dataset, label, 10) b = np.float(b) temp = alpha * y temp = np.transpose(temp) omega = np.dot(temp, x) # omega = np.transpose(omega) datafile = open('test-01-images.svm', mode='r') tedata = datafile.readlines() datafile.close() testdata = [] for i in tedata: testdata.append(i.replace('\n', '').split(' ', i.count(' '))) dataset = [] label = []