Exemple #1
0
    def _computeError(self, X, Y, name=' (unnamed)'):
        ## make a predictor and get training error
        self.predictor, self.tErr = self._trainPredictError(X, Y)

        # plot training results
        if self.plot:
            suffix = self._generateTitle() % ((str(self.tErr), self.params['lamduh']) if self.problemClass.lower() == 'lr' else (self.tErr, 'primal' if self.params['primal'] else 'dual',self.params['C']))
            title = self.problemClass + " Train" + suffix
            if self.problemClass.lower() == 'svm':
                if not self.params['primal']:
                    title += ", " + self.params['kernelName'] + " kernel"
            plotDecisionBoundary(X, Y, self.predictor, [-1, 0, 1], title = title, meshsize=self.meshSize)
        return self.tErr
Exemple #2
0
    def _computeTVError(self):
        ## load data
        train = numpy.loadtxt(self.datapath %(self.dataSetName,'train'))
        self.tX = train[:, 0:2].copy()
        #self.tPhi = makePhi(self.tX,self.M)
        #self.n,self.m = self.tPhi.shape
        self.tY = train[:, 2:3].copy()

        ## make a predictor and get training error
        self.predictor, self.tErr = self._trainPredictError(self.tX, self.tY)

        # plot training results
        if self.plot:
            suffix = self._generateTitle() % ((str(self.tErr), self.params['lamduh']) if self.problemClass.lower() == 'lr' else (self.tErr, 'primal' if self.params['primal'] else 'dual',self.params['C']))
            title = self.problemClass + " Train" + suffix
            if self.problemClass.lower() == 'svm':
                if not self.params['primal']:
                    title += ", " + self.params['kernelName'] + " kernel"
            plotDecisionBoundary(self.tX, self.tY, self.predictor, [-1, 0, 1], title = title, meshsize = self.meshSize)

        ## load validation data
        validate = numpy.loadtxt(self.datapath %(self.dataSetName,'validate'))
        self.vX = validate[:, 0:2].copy()
        self.vY = validate[:, 2:3].copy() ## actually a width of 1 for this data

        # print validation error
        self.vErr = self._getError(self.vX, self.vY, self.predictor)

        # plot validation results
        if self.plot:
            suffix = self._generateTitle() % ((str(self.vErr), self.params['lamduh']) if self.problemClass.lower() == 'lr' else (self.vErr, 'primal' if self.params['primal'] else 'dual',self.params['C']))
            title = self.problemClass + " Validate" + suffix
            if self.problemClass.lower() == 'svm':
                if not self.params['primal']:
                    title += ", " + self.params['kernelName'] + " kernel"
            plotDecisionBoundary(self.vX, self.vY, self.predictor, [-1, 0, 1], title = title, meshsize = self.meshSize)

        ## compute the geometric margin
        gm = 1.0 / numpy.linalg.norm(self.w)
        if self.problemClass.lower() == 'lr':
            return self.tErr, self.vErr, gm
        elif self.params['primal']:
            ## calculate the number of support vectors
            self.sv = self.numSupport(self.slack)
        return self.tErr, self.vErr, gm, self.sv
Exemple #3
0
    def _computeError(self, X, Y, name=' (unnamed)'):
        ## make a predictor and get training error
        self.predictor, self.tErr = self._trainPredictError(X, Y)

        # plot training results
        if self.plot:
            suffix = self._generateTitle() % (
                (str(self.tErr), self.params['lamduh'])
                if self.problemClass.lower() == 'lr' else
                (self.tErr, 'primal' if self.params['primal'] else 'dual',
                 self.params['C']))
            title = self.problemClass + " Train" + suffix
            if self.problemClass.lower() == 'svm':
                if not self.params['primal']:
                    title += ", " + self.params['kernelName'] + " kernel"
            plotDecisionBoundary(X,
                                 Y,
                                 self.predictor, [-1, 0, 1],
                                 title=title,
                                 meshsize=self.meshSize)
        return self.tErr
    def plotData(self):
        if self.titanicData:
            print "can't plot the titanic data, it's high dimensional"
            return

        idx_pos = np.where(self.y > 0)
        idx_neg = np.where(self.y < 0)
        plt.scatter(self.x[idx_pos,0], self.x[idx_pos,1], color='b', marker='o', facecolors='none', label=' = +1')
        plt.scatter(self.x[idx_neg,0], self.x[idx_neg,1], color='r', marker='o', facecolors='none', label=' = -1')

        # intersect idx_pos and supportVectorsIdx
        idx_pos_supportVecs = np.intersect1d(idx_pos,self.supportVectorsStrictIdx)
        idx_pos_supportVecsStrict = np.intersect1d(idx_pos,self.supportVectorsInsideMarginIdx)
        plt.scatter(self.x[idx_pos_supportVecs,0], self.x[idx_pos_supportVecs,1], color='b', marker='x', s=200, facecolors='none')
        plt.scatter(self.x[idx_pos_supportVecsStrict,0], self.x[idx_pos_supportVecsStrict,1], color='b', marker='v', s=200, facecolors='none')

        idx_neg_supportVecs = np.intersect1d(idx_neg,self.supportVectorsStrictIdx)
        idx_neg_supportVecsStrict = np.intersect1d(idx_neg,self.supportVectorsInsideMarginIdx)
        plt.scatter(self.x[idx_neg_supportVecs,0], self.x[idx_neg_supportVecs,1], color='r', marker='x', s=200, facecolors='none')
        plt.scatter(self.x[idx_neg_supportVecsStrict,0], self.x[idx_neg_supportVecsStrict,1], color='r', marker='v', s=200, facecolors='none')

        if (self.theta is not None) and (self.kernel_type == 'linear'):
            w_full = np.zeros((self.d+1,1))[:,0]
            w_full[0] = self.b
            w_full[1:] = self.theta
            x_1_grid = np.linspace(np.min(self.x[:,0]),np.max(self.x[:,0]), 100)
            x_2_grid = -1.0/w_full[2]*(w_full[0] + w_full[1]*x_1_grid)
            plt.plot(x_1_grid, x_2_grid, color='g', label=' = bdry')

        elif self.a is not None:
            plotDecisionBoundary(self.x, self.y, self.predictorFunction, 0, title = "")

        plt.xlabel(r'$x_1$')
        plt.ylabel(r'$x_2$')
        plt.legend(loc='best')
        plt.show()

        plt.show()
Exemple #5
0
import numpy as np
import pylab as pl
from cvxopt import matrix
from quadSVM import QuadSVM
from sklearn.svm import SVC
from cvxopt.solvers import qp
from cvxopt.solvers import options
from plotBoundary import plotDecisionBoundary
options['show_progress'] = False

x = np.array([(2, 2), (2, 3), (0, -1), (-3, -2)])
y = np.array([[1.0], [1.0], [-1.0], [-1.0]])
C = 1000.0
svm = QuadSVM(C=C)
svm.fit(x, y.flatten())
print("a", svm.predict(x))
plotDecisionBoundary(x, y, svm.predictOne, [-1, 0, 1], title='quadSVM')

clf = SVC(C=C, kernel='linear')
clf.fit(x, y.flatten())


def predictOne(x_i):
    return clf.decision_function(np.array([x_i]))


print("b", clf.decision_function(x))
plotDecisionBoundary(x, y, predictOne, [-1, 0, 1], title='sklearnSVM')
pl.show()
Exemple #6
0
    def _computeTVError(self):
        ## load data
        train = numpy.loadtxt(self.datapath % (self.dataSetName, 'train'))
        self.tX = train[:, 0:2].copy()
        #self.tPhi = makePhi(self.tX,self.M)
        #self.n,self.m = self.tPhi.shape
        self.tY = train[:, 2:3].copy()

        ## make a predictor and get training error
        self.predictor, self.tErr = self._trainPredictError(self.tX, self.tY)

        # plot training results
        if self.plot:
            suffix = self._generateTitle() % (
                (str(self.tErr), self.params['lamduh'])
                if self.problemClass.lower() == 'lr' else
                (self.tErr, 'primal' if self.params['primal'] else 'dual',
                 self.params['C']))
            title = self.problemClass + " Train" + suffix
            if self.problemClass.lower() == 'svm':
                if not self.params['primal']:
                    title += ", " + self.params['kernelName'] + " kernel"
            plotDecisionBoundary(self.tX,
                                 self.tY,
                                 self.predictor, [-1, 0, 1],
                                 title=title,
                                 meshsize=self.meshSize)

        ## load validation data
        validate = numpy.loadtxt(self.datapath %
                                 (self.dataSetName, 'validate'))
        self.vX = validate[:, 0:2].copy()
        self.vY = validate[:,
                           2:3].copy()  ## actually a width of 1 for this data

        # print validation error
        self.vErr = self._getError(self.vX, self.vY, self.predictor)

        # plot validation results
        if self.plot:
            suffix = self._generateTitle() % (
                (str(self.vErr), self.params['lamduh'])
                if self.problemClass.lower() == 'lr' else
                (self.vErr, 'primal' if self.params['primal'] else 'dual',
                 self.params['C']))
            title = self.problemClass + " Validate" + suffix
            if self.problemClass.lower() == 'svm':
                if not self.params['primal']:
                    title += ", " + self.params['kernelName'] + " kernel"
            plotDecisionBoundary(self.vX,
                                 self.vY,
                                 self.predictor, [-1, 0, 1],
                                 title=title,
                                 meshsize=self.meshSize)

        ## compute the geometric margin
        gm = 1.0 / numpy.linalg.norm(self.w)
        if self.problemClass.lower() == 'lr':
            return self.tErr, self.vErr, gm
        elif self.params['primal']:
            ## calculate the number of support vectors
            self.sv = self.numSupport(self.slack)
        return self.tErr, self.vErr, gm, self.sv
Exemple #7
0
                                                Ytrain,
                                                Xval,
                                                Yval,
                                                L=4,
                                                M=[5, 10, 3])
    print 'Finished training in ', num_iters, ' rounds with a validation accuracy of ', acc
    print 'Performance on test set: ', classify_accuracy(
        Xtest, Ytest, weights, offsets)

    def predictNN(x):
        y_vector = NN_predict(x, weights, offsets)
        index = np.nonzero(y_vector)
        return index[0][0]

    # plot validation results
    plot.plotDecisionBoundary(X, Y, predictNN, [-1, 0, 1], title='NN toy set')
    pl.show()

#### TEST ON HW2 DATA SETS ####
hw2_data = False
if hw2_data:
    # parameters
    name = '2'
    print '======Training======'
    # load data from csv files
    train = np.loadtxt('data/data' + name + '_train.csv')
    Xtrain = train[:, 0:2]
    Ytrain_values = train[:, 2:3].astype(int)
    Ytrain_values[Ytrain_values < 0] = 0
    Ytrain = one_hot(Ytrain_values.reshape(1, -1)[0], 2)
    val = np.loadtxt('data/data' + name + '_validate.csv')
Exemple #8
0
            for j, sv_x_i in enumerate(self.sv_x):
                y[i] += self.sv_a[j] * self.sv_y[j] * self._kernel(x_i, sv_x_i)
        return y + self.bias

    def predictOne(self, x):
        return self.predict(np.array([x]))


x = np.array([
    [2.0, 2.0],  # SV1
    [2.0, 3.0],  # SV2
    [-2.0, 0.0],  # SV3
    [-4.0, -3.0],  # SV4
    [3.0, 1.0],  # SV4
    [0.0, -1.0],  # irrelevant
    [-3.0, -2.0]  # irrelevant
])
y = np.array([[1.0], [1.0], [1.0], [1.0], [-1.0], [-1.0], [-1.0]])
L = .00001
svm = quadSVM(C=1.0 / L, kernel=make_polynomial_kernel(3))
svm.fit(x, y)
print(svm.predict(x))
plotDecisionBoundary(x, y, svm.predictOne, [0], title='quadSVM')

print("\n")

clf = SVC()
clf.fit(x, y)
print(clf.decision_function(x))

pl.show()
Exemple #9
0
    Xval = X[400:600,:]
    Yval = one_hot(Y[400:600,:].reshape(1,-1)[0],3)
    
    Xtest = X[600:,:]
    Ytest = one_hot(Y[600:,:].reshape(1,-1)[0],3)
    print 'Training...'
    weights, offsets , acc, num_iters = NN_train(Xtrain, Ytrain, Xval, Yval, L=4,M=[5,10,3])   
    print 'Finished training in ', num_iters, ' rounds with a validation accuracy of ', acc
    print 'Performance on test set: ', classify_accuracy(Xtest, Ytest, weights, offsets)
    
    def predictNN(x):
        y_vector = NN_predict(x, weights, offsets)
        index = np.nonzero(y_vector)
        return index[0][0]
    # plot validation results
    plot.plotDecisionBoundary(X, Y, predictNN, [-1,0,1], title = 'NN toy set')
    pl.show()
    
#### TEST ON HW2 DATA SETS ####
hw2_data = True
if hw2_data:
    # parameters
    name = '4'
    print '====== HW2 DATA SET ======'
    # load data from csv files
    train = np.loadtxt('data/data'+name+'_train.csv')
    Xtrain = train[:, 0:2]
    Ytrain_values=train[:, 2:3].astype(int)
    Ytrain_values[Ytrain_values < 0] = 0
    Ytrain = one_hot(Ytrain_values.reshape(1,-1)[0],2)
    val = np.loadtxt('data/data'+name+'_validate.csv')
    def plotData(self):
        if self.titanicData:
            print "can't plot the titanic data, it's high dimensional"
            return

        idx_pos = np.where(self.y > 0)
        idx_neg = np.where(self.y < 0)
        plt.scatter(self.x[idx_pos, 0],
                    self.x[idx_pos, 1],
                    color='b',
                    marker='o',
                    facecolors='none',
                    label=' = +1')
        plt.scatter(self.x[idx_neg, 0],
                    self.x[idx_neg, 1],
                    color='r',
                    marker='o',
                    facecolors='none',
                    label=' = -1')

        # intersect idx_pos and supportVectorsIdx
        idx_pos_supportVecs = np.intersect1d(idx_pos,
                                             self.supportVectorsStrictIdx)
        idx_pos_supportVecsStrict = np.intersect1d(
            idx_pos, self.supportVectorsInsideMarginIdx)
        plt.scatter(self.x[idx_pos_supportVecs, 0],
                    self.x[idx_pos_supportVecs, 1],
                    color='b',
                    marker='x',
                    s=200,
                    facecolors='none')
        plt.scatter(self.x[idx_pos_supportVecsStrict, 0],
                    self.x[idx_pos_supportVecsStrict, 1],
                    color='b',
                    marker='v',
                    s=200,
                    facecolors='none')

        idx_neg_supportVecs = np.intersect1d(idx_neg,
                                             self.supportVectorsStrictIdx)
        idx_neg_supportVecsStrict = np.intersect1d(
            idx_neg, self.supportVectorsInsideMarginIdx)
        plt.scatter(self.x[idx_neg_supportVecs, 0],
                    self.x[idx_neg_supportVecs, 1],
                    color='r',
                    marker='x',
                    s=200,
                    facecolors='none')
        plt.scatter(self.x[idx_neg_supportVecsStrict, 0],
                    self.x[idx_neg_supportVecsStrict, 1],
                    color='r',
                    marker='v',
                    s=200,
                    facecolors='none')

        if (self.theta is not None) and (self.kernel_type == 'linear'):
            w_full = np.zeros((self.d + 1, 1))[:, 0]
            w_full[0] = self.b
            w_full[1:] = self.theta
            x_1_grid = np.linspace(np.min(self.x[:, 0]), np.max(self.x[:, 0]),
                                   100)
            x_2_grid = -1.0 / w_full[2] * (w_full[0] + w_full[1] * x_1_grid)
            plt.plot(x_1_grid, x_2_grid, color='g', label=' = bdry')

        elif self.a is not None:
            plotDecisionBoundary(self.x,
                                 self.y,
                                 self.predictorFunction,
                                 0,
                                 title="")

        plt.xlabel(r'$x_1$')
        plt.ylabel(r'$x_2$')
        plt.legend(loc='best')
        plt.show()

        plt.show()
Exemple #11
0
                                                        C=10**20)
L2_logistic_regressor = linear_model.LogisticRegression(penalty='l2',
                                                        tol=0.001,
                                                        C=1)

L2_logistic_regressor.fit(X, Y)
L1_logistic_regressor.fit(X, Y)

predictor = create_Logistic_predictor(L1_logistic_regressor)

##Define the predictLR(x) function, which uses trained parameters
# def predictLR(x, regressor = L2_logistic_regressor):
# 	return regressor.predict

# plot training results
plotDecisionBoundary(X, Y, predictor, [0.5], title='LR Train')
pl.show()

# print '======Validation======'
# # load data from csv files
# validate = loadtxt('../data/data'+name+'_validate.csv')
# X_v = validate[:,0:2]
# Y_v = validate[:,2:3]

# # plot validation results
# plotDecisionBoundary(X_v, Y_v, predictor, [0.5], title = 'LR Validate')
# pl.show()

# print '======Test======'
# # load data from csv files
# test = loadtxt('../data/data'+name+'_test.csv')
Exemple #12
0
dataset_id = "1"
train = np.loadtxt('data/data' + dataset_id + '_train.csv')
x_train, y_train = train[:, 0:2], train[:, 2:3]
test = np.loadtxt('data/data' + dataset_id + '_test.csv')
x_test, y_test = test[:, 0:2], test[:, 2:3]
val = np.loadtxt('data/data' + dataset_id + '_validate.csv')
x_val, y_val = val[:, 0:2], val[:, 2:3]

x_axis = [2 * 10**-i for i in range(1, 11)]
y_axis = []
for L in x_axis:
    svm = pLSVM(L=L)
    y_axis += [svm.fit(x_train, y_train)]
    print("pLSVM train", 1.0 - svm.score(x_train, y_train))
    print("pLSVM val", 1.0 - svm.score(x_val, y_val))
    print("pLSVM test", 1.0 - svm.score(x_test, y_test))
    print("")
    plotDecisionBoundary(x_train,
                         y_train,
                         svm.predictOne, [-1, 0, 1],
                         title='pLSVM')

plt.figure()
plt.plot(x_axis, y_axis)
plt.xlabel(r"$\lambda$")
plt.ylabel('geometric margin')
plt.xlim(0, 0.2)
plt.tight_layout()
plt.show()
Exemple #13
0
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='sgd',
                  metrics=['accuracy'])
    model.fit(x, y)

print(y)
print(model.predict(x))


def predictOne(x_i):
    output = model.predict(np.array([x_i]))
    return output[0, 0]


plotDecisionBoundary(x, data[:, 2], predictOne, [0.5])


def predictOne(x_i):
    output = model.predict(np.array([x_i]))
    return output[0, 1]


plotDecisionBoundary(x, data[:, 2], predictOne, [0.5])


def predictOne(x_i):
    output = model.predict(np.array([x_i]))
    return output[0, 2]