def _computeError(self, X, Y, name=' (unnamed)'): ## make a predictor and get training error self.predictor, self.tErr = self._trainPredictError(X, Y) # plot training results if self.plot: suffix = self._generateTitle() % ((str(self.tErr), self.params['lamduh']) if self.problemClass.lower() == 'lr' else (self.tErr, 'primal' if self.params['primal'] else 'dual',self.params['C'])) title = self.problemClass + " Train" + suffix if self.problemClass.lower() == 'svm': if not self.params['primal']: title += ", " + self.params['kernelName'] + " kernel" plotDecisionBoundary(X, Y, self.predictor, [-1, 0, 1], title = title, meshsize=self.meshSize) return self.tErr
def _computeTVError(self): ## load data train = numpy.loadtxt(self.datapath %(self.dataSetName,'train')) self.tX = train[:, 0:2].copy() #self.tPhi = makePhi(self.tX,self.M) #self.n,self.m = self.tPhi.shape self.tY = train[:, 2:3].copy() ## make a predictor and get training error self.predictor, self.tErr = self._trainPredictError(self.tX, self.tY) # plot training results if self.plot: suffix = self._generateTitle() % ((str(self.tErr), self.params['lamduh']) if self.problemClass.lower() == 'lr' else (self.tErr, 'primal' if self.params['primal'] else 'dual',self.params['C'])) title = self.problemClass + " Train" + suffix if self.problemClass.lower() == 'svm': if not self.params['primal']: title += ", " + self.params['kernelName'] + " kernel" plotDecisionBoundary(self.tX, self.tY, self.predictor, [-1, 0, 1], title = title, meshsize = self.meshSize) ## load validation data validate = numpy.loadtxt(self.datapath %(self.dataSetName,'validate')) self.vX = validate[:, 0:2].copy() self.vY = validate[:, 2:3].copy() ## actually a width of 1 for this data # print validation error self.vErr = self._getError(self.vX, self.vY, self.predictor) # plot validation results if self.plot: suffix = self._generateTitle() % ((str(self.vErr), self.params['lamduh']) if self.problemClass.lower() == 'lr' else (self.vErr, 'primal' if self.params['primal'] else 'dual',self.params['C'])) title = self.problemClass + " Validate" + suffix if self.problemClass.lower() == 'svm': if not self.params['primal']: title += ", " + self.params['kernelName'] + " kernel" plotDecisionBoundary(self.vX, self.vY, self.predictor, [-1, 0, 1], title = title, meshsize = self.meshSize) ## compute the geometric margin gm = 1.0 / numpy.linalg.norm(self.w) if self.problemClass.lower() == 'lr': return self.tErr, self.vErr, gm elif self.params['primal']: ## calculate the number of support vectors self.sv = self.numSupport(self.slack) return self.tErr, self.vErr, gm, self.sv
def _computeError(self, X, Y, name=' (unnamed)'): ## make a predictor and get training error self.predictor, self.tErr = self._trainPredictError(X, Y) # plot training results if self.plot: suffix = self._generateTitle() % ( (str(self.tErr), self.params['lamduh']) if self.problemClass.lower() == 'lr' else (self.tErr, 'primal' if self.params['primal'] else 'dual', self.params['C'])) title = self.problemClass + " Train" + suffix if self.problemClass.lower() == 'svm': if not self.params['primal']: title += ", " + self.params['kernelName'] + " kernel" plotDecisionBoundary(X, Y, self.predictor, [-1, 0, 1], title=title, meshsize=self.meshSize) return self.tErr
def plotData(self): if self.titanicData: print "can't plot the titanic data, it's high dimensional" return idx_pos = np.where(self.y > 0) idx_neg = np.where(self.y < 0) plt.scatter(self.x[idx_pos,0], self.x[idx_pos,1], color='b', marker='o', facecolors='none', label=' = +1') plt.scatter(self.x[idx_neg,0], self.x[idx_neg,1], color='r', marker='o', facecolors='none', label=' = -1') # intersect idx_pos and supportVectorsIdx idx_pos_supportVecs = np.intersect1d(idx_pos,self.supportVectorsStrictIdx) idx_pos_supportVecsStrict = np.intersect1d(idx_pos,self.supportVectorsInsideMarginIdx) plt.scatter(self.x[idx_pos_supportVecs,0], self.x[idx_pos_supportVecs,1], color='b', marker='x', s=200, facecolors='none') plt.scatter(self.x[idx_pos_supportVecsStrict,0], self.x[idx_pos_supportVecsStrict,1], color='b', marker='v', s=200, facecolors='none') idx_neg_supportVecs = np.intersect1d(idx_neg,self.supportVectorsStrictIdx) idx_neg_supportVecsStrict = np.intersect1d(idx_neg,self.supportVectorsInsideMarginIdx) plt.scatter(self.x[idx_neg_supportVecs,0], self.x[idx_neg_supportVecs,1], color='r', marker='x', s=200, facecolors='none') plt.scatter(self.x[idx_neg_supportVecsStrict,0], self.x[idx_neg_supportVecsStrict,1], color='r', marker='v', s=200, facecolors='none') if (self.theta is not None) and (self.kernel_type == 'linear'): w_full = np.zeros((self.d+1,1))[:,0] w_full[0] = self.b w_full[1:] = self.theta x_1_grid = np.linspace(np.min(self.x[:,0]),np.max(self.x[:,0]), 100) x_2_grid = -1.0/w_full[2]*(w_full[0] + w_full[1]*x_1_grid) plt.plot(x_1_grid, x_2_grid, color='g', label=' = bdry') elif self.a is not None: plotDecisionBoundary(self.x, self.y, self.predictorFunction, 0, title = "") plt.xlabel(r'$x_1$') plt.ylabel(r'$x_2$') plt.legend(loc='best') plt.show() plt.show()
import numpy as np import pylab as pl from cvxopt import matrix from quadSVM import QuadSVM from sklearn.svm import SVC from cvxopt.solvers import qp from cvxopt.solvers import options from plotBoundary import plotDecisionBoundary options['show_progress'] = False x = np.array([(2, 2), (2, 3), (0, -1), (-3, -2)]) y = np.array([[1.0], [1.0], [-1.0], [-1.0]]) C = 1000.0 svm = QuadSVM(C=C) svm.fit(x, y.flatten()) print("a", svm.predict(x)) plotDecisionBoundary(x, y, svm.predictOne, [-1, 0, 1], title='quadSVM') clf = SVC(C=C, kernel='linear') clf.fit(x, y.flatten()) def predictOne(x_i): return clf.decision_function(np.array([x_i])) print("b", clf.decision_function(x)) plotDecisionBoundary(x, y, predictOne, [-1, 0, 1], title='sklearnSVM') pl.show()
def _computeTVError(self): ## load data train = numpy.loadtxt(self.datapath % (self.dataSetName, 'train')) self.tX = train[:, 0:2].copy() #self.tPhi = makePhi(self.tX,self.M) #self.n,self.m = self.tPhi.shape self.tY = train[:, 2:3].copy() ## make a predictor and get training error self.predictor, self.tErr = self._trainPredictError(self.tX, self.tY) # plot training results if self.plot: suffix = self._generateTitle() % ( (str(self.tErr), self.params['lamduh']) if self.problemClass.lower() == 'lr' else (self.tErr, 'primal' if self.params['primal'] else 'dual', self.params['C'])) title = self.problemClass + " Train" + suffix if self.problemClass.lower() == 'svm': if not self.params['primal']: title += ", " + self.params['kernelName'] + " kernel" plotDecisionBoundary(self.tX, self.tY, self.predictor, [-1, 0, 1], title=title, meshsize=self.meshSize) ## load validation data validate = numpy.loadtxt(self.datapath % (self.dataSetName, 'validate')) self.vX = validate[:, 0:2].copy() self.vY = validate[:, 2:3].copy() ## actually a width of 1 for this data # print validation error self.vErr = self._getError(self.vX, self.vY, self.predictor) # plot validation results if self.plot: suffix = self._generateTitle() % ( (str(self.vErr), self.params['lamduh']) if self.problemClass.lower() == 'lr' else (self.vErr, 'primal' if self.params['primal'] else 'dual', self.params['C'])) title = self.problemClass + " Validate" + suffix if self.problemClass.lower() == 'svm': if not self.params['primal']: title += ", " + self.params['kernelName'] + " kernel" plotDecisionBoundary(self.vX, self.vY, self.predictor, [-1, 0, 1], title=title, meshsize=self.meshSize) ## compute the geometric margin gm = 1.0 / numpy.linalg.norm(self.w) if self.problemClass.lower() == 'lr': return self.tErr, self.vErr, gm elif self.params['primal']: ## calculate the number of support vectors self.sv = self.numSupport(self.slack) return self.tErr, self.vErr, gm, self.sv
Ytrain, Xval, Yval, L=4, M=[5, 10, 3]) print 'Finished training in ', num_iters, ' rounds with a validation accuracy of ', acc print 'Performance on test set: ', classify_accuracy( Xtest, Ytest, weights, offsets) def predictNN(x): y_vector = NN_predict(x, weights, offsets) index = np.nonzero(y_vector) return index[0][0] # plot validation results plot.plotDecisionBoundary(X, Y, predictNN, [-1, 0, 1], title='NN toy set') pl.show() #### TEST ON HW2 DATA SETS #### hw2_data = False if hw2_data: # parameters name = '2' print '======Training======' # load data from csv files train = np.loadtxt('data/data' + name + '_train.csv') Xtrain = train[:, 0:2] Ytrain_values = train[:, 2:3].astype(int) Ytrain_values[Ytrain_values < 0] = 0 Ytrain = one_hot(Ytrain_values.reshape(1, -1)[0], 2) val = np.loadtxt('data/data' + name + '_validate.csv')
for j, sv_x_i in enumerate(self.sv_x): y[i] += self.sv_a[j] * self.sv_y[j] * self._kernel(x_i, sv_x_i) return y + self.bias def predictOne(self, x): return self.predict(np.array([x])) x = np.array([ [2.0, 2.0], # SV1 [2.0, 3.0], # SV2 [-2.0, 0.0], # SV3 [-4.0, -3.0], # SV4 [3.0, 1.0], # SV4 [0.0, -1.0], # irrelevant [-3.0, -2.0] # irrelevant ]) y = np.array([[1.0], [1.0], [1.0], [1.0], [-1.0], [-1.0], [-1.0]]) L = .00001 svm = quadSVM(C=1.0 / L, kernel=make_polynomial_kernel(3)) svm.fit(x, y) print(svm.predict(x)) plotDecisionBoundary(x, y, svm.predictOne, [0], title='quadSVM') print("\n") clf = SVC() clf.fit(x, y) print(clf.decision_function(x)) pl.show()
Xval = X[400:600,:] Yval = one_hot(Y[400:600,:].reshape(1,-1)[0],3) Xtest = X[600:,:] Ytest = one_hot(Y[600:,:].reshape(1,-1)[0],3) print 'Training...' weights, offsets , acc, num_iters = NN_train(Xtrain, Ytrain, Xval, Yval, L=4,M=[5,10,3]) print 'Finished training in ', num_iters, ' rounds with a validation accuracy of ', acc print 'Performance on test set: ', classify_accuracy(Xtest, Ytest, weights, offsets) def predictNN(x): y_vector = NN_predict(x, weights, offsets) index = np.nonzero(y_vector) return index[0][0] # plot validation results plot.plotDecisionBoundary(X, Y, predictNN, [-1,0,1], title = 'NN toy set') pl.show() #### TEST ON HW2 DATA SETS #### hw2_data = True if hw2_data: # parameters name = '4' print '====== HW2 DATA SET ======' # load data from csv files train = np.loadtxt('data/data'+name+'_train.csv') Xtrain = train[:, 0:2] Ytrain_values=train[:, 2:3].astype(int) Ytrain_values[Ytrain_values < 0] = 0 Ytrain = one_hot(Ytrain_values.reshape(1,-1)[0],2) val = np.loadtxt('data/data'+name+'_validate.csv')
def plotData(self): if self.titanicData: print "can't plot the titanic data, it's high dimensional" return idx_pos = np.where(self.y > 0) idx_neg = np.where(self.y < 0) plt.scatter(self.x[idx_pos, 0], self.x[idx_pos, 1], color='b', marker='o', facecolors='none', label=' = +1') plt.scatter(self.x[idx_neg, 0], self.x[idx_neg, 1], color='r', marker='o', facecolors='none', label=' = -1') # intersect idx_pos and supportVectorsIdx idx_pos_supportVecs = np.intersect1d(idx_pos, self.supportVectorsStrictIdx) idx_pos_supportVecsStrict = np.intersect1d( idx_pos, self.supportVectorsInsideMarginIdx) plt.scatter(self.x[idx_pos_supportVecs, 0], self.x[idx_pos_supportVecs, 1], color='b', marker='x', s=200, facecolors='none') plt.scatter(self.x[idx_pos_supportVecsStrict, 0], self.x[idx_pos_supportVecsStrict, 1], color='b', marker='v', s=200, facecolors='none') idx_neg_supportVecs = np.intersect1d(idx_neg, self.supportVectorsStrictIdx) idx_neg_supportVecsStrict = np.intersect1d( idx_neg, self.supportVectorsInsideMarginIdx) plt.scatter(self.x[idx_neg_supportVecs, 0], self.x[idx_neg_supportVecs, 1], color='r', marker='x', s=200, facecolors='none') plt.scatter(self.x[idx_neg_supportVecsStrict, 0], self.x[idx_neg_supportVecsStrict, 1], color='r', marker='v', s=200, facecolors='none') if (self.theta is not None) and (self.kernel_type == 'linear'): w_full = np.zeros((self.d + 1, 1))[:, 0] w_full[0] = self.b w_full[1:] = self.theta x_1_grid = np.linspace(np.min(self.x[:, 0]), np.max(self.x[:, 0]), 100) x_2_grid = -1.0 / w_full[2] * (w_full[0] + w_full[1] * x_1_grid) plt.plot(x_1_grid, x_2_grid, color='g', label=' = bdry') elif self.a is not None: plotDecisionBoundary(self.x, self.y, self.predictorFunction, 0, title="") plt.xlabel(r'$x_1$') plt.ylabel(r'$x_2$') plt.legend(loc='best') plt.show() plt.show()
C=10**20) L2_logistic_regressor = linear_model.LogisticRegression(penalty='l2', tol=0.001, C=1) L2_logistic_regressor.fit(X, Y) L1_logistic_regressor.fit(X, Y) predictor = create_Logistic_predictor(L1_logistic_regressor) ##Define the predictLR(x) function, which uses trained parameters # def predictLR(x, regressor = L2_logistic_regressor): # return regressor.predict # plot training results plotDecisionBoundary(X, Y, predictor, [0.5], title='LR Train') pl.show() # print '======Validation======' # # load data from csv files # validate = loadtxt('../data/data'+name+'_validate.csv') # X_v = validate[:,0:2] # Y_v = validate[:,2:3] # # plot validation results # plotDecisionBoundary(X_v, Y_v, predictor, [0.5], title = 'LR Validate') # pl.show() # print '======Test======' # # load data from csv files # test = loadtxt('../data/data'+name+'_test.csv')
dataset_id = "1" train = np.loadtxt('data/data' + dataset_id + '_train.csv') x_train, y_train = train[:, 0:2], train[:, 2:3] test = np.loadtxt('data/data' + dataset_id + '_test.csv') x_test, y_test = test[:, 0:2], test[:, 2:3] val = np.loadtxt('data/data' + dataset_id + '_validate.csv') x_val, y_val = val[:, 0:2], val[:, 2:3] x_axis = [2 * 10**-i for i in range(1, 11)] y_axis = [] for L in x_axis: svm = pLSVM(L=L) y_axis += [svm.fit(x_train, y_train)] print("pLSVM train", 1.0 - svm.score(x_train, y_train)) print("pLSVM val", 1.0 - svm.score(x_val, y_val)) print("pLSVM test", 1.0 - svm.score(x_test, y_test)) print("") plotDecisionBoundary(x_train, y_train, svm.predictOne, [-1, 0, 1], title='pLSVM') plt.figure() plt.plot(x_axis, y_axis) plt.xlabel(r"$\lambda$") plt.ylabel('geometric margin') plt.xlim(0, 0.2) plt.tight_layout() plt.show()
model.add(Dense(3, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.fit(x, y) print(y) print(model.predict(x)) def predictOne(x_i): output = model.predict(np.array([x_i])) return output[0, 0] plotDecisionBoundary(x, data[:, 2], predictOne, [0.5]) def predictOne(x_i): output = model.predict(np.array([x_i])) return output[0, 1] plotDecisionBoundary(x, data[:, 2], predictOne, [0.5]) def predictOne(x_i): output = model.predict(np.array([x_i])) return output[0, 2]