def dataset3Params(X, y, Xval, yval):

        # You need to return the following variables correctly.
        c_final = 1

        sigma_final = 0.3

        test_vals = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]

        lowest_error = np.inf

        for c in test_vals:

            for s in test_vals:

                model = utils.svmTrain(X, y, c, gaussianKernel, (s))

                predictions = utils.svmPredict(model, Xval)

                error = np.mean(predictions != yval)

                if (error < lowest_error):

                    lowest_error = error

                    c_final = c

                    sigma_final = s

        return c_final, sigma_final
コード例 #2
0
def dataset3Params(X, y, Xval, yval):
    """   
    Instructions
    ------------
    Fill in this function to return the optimal C and sigma learning 
    parameters found using the cross validation set.
    You can use `svmPredict` to predict the labels on the cross
    validation set. For example, 
    
        predictions = svmPredict(model, Xval)

    will return the predictions on the cross validation set.

    """
    # You need to return the following variables correctly.
    C = 1
    sigma = 0.3

    # ====================== YOUR CODE HERE ======================
    testvalues = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    best_error = 10000

    for C_i in testvalues:
        for sigma_j in testvalues:
            predictions = utils.svmPredict(
                utils.svmTrain(X, y, C_i, gaussianKernel, args=(sigma_j, )),
                Xval)
            error = np.mean(predictions != yval)
            if error < best_error:
                best_error = error
                C = C_i
                sigma = sigma_j

    # ============================================================
    return C, sigma
コード例 #3
0
def dataset3Params(X, y, Xval, yval):
    C = 0
    sigma = 0
    correctness = 0
    grid_list = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    for C_i in grid_list:
        for sigma_i in grid_list:
            model_i = utils.svmTrain(X,
                                     y,
                                     C_i,
                                     gaussianKernel,
                                     args=(sigma_i, ))
            predictions_i = utils.svmPredict(model_i, Xval)
            correctness_i = np.mean(predictions_i == yval)
            if correctness_i > correctness:
                C = C_i
                sigma = sigma_i
                correctness = correctness_i
            print(C, sigma, correctness_i)
    return C, sigma
コード例 #4
0
def dataset3Params(X, y, Xval, yval):
    # You need to return the following variables correctly.
    C = 1
    sigma = 0.3

    # ====================== YOUR CODE HERE ======================
    c = {0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30}
    s = {0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30}
    best_prediction = 100000000.0
    for C in c:
        for sigma in s:
            model = utils.svmTrain(X, y, C, gaussianKernel, args=(sigma, ))
            predictions = utils.svmPredict(model, Xval)
            current_prediction = np.mean(predictions != yval)
            if current_prediction < best_prediction:
                best_prediction = current_prediction
                final_C = C
                final_sigma = sigma

    # ============================================================
    #return C, sigma
    return final_C, final_sigma
コード例 #5
0
def dataset3Params(X, y, Xval, yval):
    # You need to return the following variables correctly.
    # C = 1
    # sigma = 0.3

    number_vec = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    predicts = []

    for i in range(len(number_vec)):
        for j in range(len(number_vec)):
            C = number_vec[i]
            sigma = number_vec[j]
            model = utils.svmTrain(X, y, C, gaussianKernel, args=(sigma, ))
            predictions = utils.svmPredict(model, Xval)
            err_cv = np.mean(predictions != yval)
            temp_tuple = (C, sigma, err_cv)
            predicts.append(temp_tuple)

    # print(len(predicts))
    tam = sorted(predicts, key=lambda tup: tup[2])
    # print(tam[0])
    # return C, sigma
    return tam[0][0], tam[0][1]
コード例 #6
0
def dataset3Params(X, y, Xval, yval):
    """
    Returns your choice of C and sigma for Part 3 of the exercise
    where you select the optimal (C, sigma) learning parameters to use for SVM
    with RBF kernel.

    Parameters
    ----------
    X : array_like
        (m x n) matrix of training data where m is number of training examples, and
        n is the number of features.

    y : array_like
        (m, ) vector of labels for ther training data.

    Xval : array_like
        (mv x n) matrix of validation data where mv is the number of validation examples
        and n is the number of features

    yval : array_like
        (mv, ) vector of labels for the validation data.

    Returns
    -------
    C, sigma : float, float
        The best performing values for the regularization parameter C and
        RBF parameter sigma.

    Instructions
    ------------
    Fill in this function to return the optimal C and sigma learning
    parameters found using the cross validation set.
    You can use `svmPredict` to predict the labels on the cross
    validation set. For example,

        predictions = svmPredict(model, Xval)

    will return the predictions on the cross validation set.

    Note
    ----
    You can compute the prediction error using

        np.mean(predictions != yval)
    """
    # You need to return the following variables correctly.
    C = 1
    sigma = 0.3

    # ====================== YOUR CODE HERE ======================

    # Range of C and sigma values to be tested.
    C_list = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    sigma_list = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    best_C = 0.01
    best_sigma = 0.01

    # First iteration to simply set best_error.
    model = utils.svmTrain(X, y, best_C, gaussianKernel, args=(best_sigma,))
    predictions = utils.svmPredict(model, Xval)
    best_error = np.mean(predictions != yval)

    # Iterate through all possible training scenarios using each
    # C and sigma value. Save the optimal values based on lowest
    # error value and return them.
    for C in C_list:
        for sigma in sigma_list:

            model = utils.svmTrain(X, y, C, gaussianKernel, args=(sigma,))
            predictions = utils.svmPredict(model, Xval)
            error = np.mean(predictions != yval)

            if error < best_error:
                best_error = error
                best_C = C
                best_sigma = sigma

    # ============================================================
    return best_C, best_sigma
コード例 #7
0
# -------------------------- Testing Gaussian Kernel --------------------------------------

# Load from ex6data1
# You will have X, y as keys in the dict data
data = loadmat(os.path.join('Data', 'ex6data1.mat'))
X, y = data['X'], data['y'][:, 0]

# Plot training data
utils.plotData(X, y)
#pyplot.show()

# You should try to change the C value below and see how the decision
# boundary varies (e.g., try C = 1000)
C = 1

model = utils.svmTrain(X, y, C, utils.linearKernel, 1e-3, 20)
utils.visualizeBoundaryLinear(X, y, model)
#pyplot.show()

x1 = np.array([1, 2, 1])
x2 = np.array([0, 4, -1])
sigma = 2

sim = gaussianKernel(x1, x2, sigma)

print('Gaussian Kernel between x1 = [1, 2, 1], x2 = [0, 4, -1], sigma = %0.2f:'
      '\n\t%f\n(for sigma = 2, this value should be about 0.324652)\n' % (sigma, sim))

grader[1] = gaussianKernel
grader.grade()
コード例 #8
0
def dataset3Params(X, y, Xval, yval):
    """
    Returns your choice of C and sigma for Part 3 of the exercise 
    where you select the optimal (C, sigma) learning parameters to use for SVM
    with RBF kernel.
    
    Parameters
    ----------
    X : array_like
        (m x n) matrix of training data where m is number of training examples, and 
        n is the number of features.
    
    y : array_like
        (m, ) vector of labels for ther training data.
    
    Xval : array_like
        (mv x n) matrix of validation data where mv is the number of validation examples
        and n is the number of features
    
    yval : array_like
        (mv, ) vector of labels for the validation data.
    
    Returns
    -------
    C, sigma : float, float
        The best performing values for the regularization parameter C and 
        RBF parameter sigma.
    
    Instructions
    ------------
    Fill in this function to return the optimal C and sigma learning 
    parameters found using the cross validation set.
    You can use `svmPredict` to predict the labels on the cross
    validation set. For example, 
    
        predictions = svmPredict(model, Xval)

    will return the predictions on the cross validation set.
    
    Note
    ----
    You can compute the prediction error using 
    
        np.mean(predictions != yval)
    """
    # You need to return the following variables correctly.
    C = 1
    sigma = 0.3

    # ====================== YOUR CODE HERE ======================

    C_array = np.array([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30])
    sigma_array = np.array([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30])
    err_array = np.zeros([C_array.size, sigma_array.size])

    for i in np.arange(C_array.size):
        for j in np.arange(sigma_array.size):
            model = utils.svmTrain(X,
                                   y,
                                   C_array[i],
                                   gaussianKernel,
                                   args=(sigma_array[j], ))
            predictions = utils.svmPredict(model, Xval)
            pred_error = np.mean(predictions != yval)
            err_array[i, j] = pred_error

    ind = np.unravel_index(np.argmin(err_array, axis=None), err_array.shape)
    C = C_array[ind[0]]
    sigma = sigma_array[ind[1]]

    # ============================================================
    return C, sigma
コード例 #9
0
    features = emailFeatures(word_indices)

    # Print Stats
    print('\nLength of feature vector: %d' % len(features))
    print('Number of non-zero entries: %d' % sum(features > 0))

    # Load the Spam Email dataset
    # You will have X, y in your environment
    data = loadmat(os.path.join('Data', 'spamTrain.mat'))
    X, y = data['X'].astype(float), data['y'][:, 0]

    print('Training Linear SVM (Spam Classification)')
    print('This may take 1 to 2 minutes ...\n')

    C = 0.1
    model = utils.svmTrain(X, y, C, utils.linearKernel)

    # Compute the training accuracy
    p = utils.svmPredict(model, X)

    print('Training Accuracy: %.2f' % (np.mean(p == y) * 100))

    # Load the test dataset
    # You will have Xtest, ytest in your environment
    data = loadmat(os.path.join('Data', 'spamTest.mat'))
    Xtest, ytest = data['Xtest'].astype(float), data['ytest'][:, 0]

    print('Evaluating the trained Linear SVM on a test set ...')
    p = utils.svmPredict(model, Xtest)

    print('Test Accuracy: %.2f' % (np.mean(p == ytest) * 100))
コード例 #10
0
def dataset3Params(X, y, Xval, yval):
    """
    Returns your choice of C and sigma for Part 3 of the exercise 
    where you select the optimal (C, sigma) learning parameters to use for SVM
    with RBF kernel.
    
    Parameters
    ----------
    X : array_like
        (m x n) matrix of training data where m is number of training examples, and 
        n is the number of features.
    
    y : array_like
        (m, ) vector of labels for ther training data.
    
    Xval : array_like
        (mv x n) matrix of validation data where mv is the number of validation examples
        and n is the number of features
    
    yval : array_like
        (mv, ) vector of labels for the validation data.
    
    Returns
    -------
    C, sigma : float, float
        The best performing values for the regularization parameter C and 
        RBF parameter sigma.
    
    Instructions
    ------------
    Fill in this function to return the optimal C and sigma learning 
    parameters found using the cross validation set.
    You can use `svmPredict` to predict the labels on the cross
    validation set. For example, 
    
        predictions = svmPredict(model, Xval)

    will return the predictions on the cross validation set.
    
    Note
    ----
    You can compute the prediction error using 
    
        np.mean(predictions != yval)
    """
    # You need to return the following variables correctly.
    C = 1
    sigma = 0.3
    error = 10**6
    # ====================== YOUR CODE HERE ======================
    for i in [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]:
        for j in [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]:
            model = utils.svmTrain(X, y, i, gaussianKernel, args=(j, ))
            predictions = utils.svmPredict(model, Xval)
            temp = np.mean(predictions != yval)
            if temp < error:
                error = temp
                C = i
                sigma = j
    # ============================================================
    return C, sigma
コード例 #11
0
from scipy.io import loadmat
import utils
from scipy import optimize
import re

###===================Part 1.1: Load and  Visualizing Data ============================
data = loadmat(
    "D:/TJH/ML03/machine-learning-ex6/machine-learning-ex6/ex6/ex6data1.mat")
X, y = data["X"], data["y"].ravel()  ### X: (12,1)

# utils.plotData(X,y)
# plt.show()

C = 1
# C = 100
model = utils.svmTrain(X, y, C, utils.linearKernel, 1e-3, 20)

# utils.visualizeBoundaryLinear(X,y,model)
# plt.show()


###===================Part 1.2.1: SVM with Gaussian Kernels ============================
def gaussianKernel(x1, x2, sigma):
    sim = 0
    # ====================== YOUR CODE HERE ======================
    norm = np.linalg.norm(x1 - x2)
    sim = np.exp(-norm**2 / 2 / (sigma**2))
    # =============================================================
    return sim

コード例 #12
0
    return features_vector


data = loadmat(os.path.join('Data', 'spamTrain.mat'))
X = data["X"]
y = data["y"]

y = y.flatten()
print(y[0])
tam = X[0]
print(tam)

# sss = X[:5] sss_y = y[:5] print(sss.shape)

# Train the SVM
model = utils.svmTrain(X, y, 3, gaussianKernel, args=(0.1, ))
# model = utils.svmTrain(X, y, 3, utils.linearKernel, args=(0.01,))
# predictions = utils.svmPredict(model, X)
# err_train = np.mean(predictions == y) * 100
# print('Train Accuracy: {} % '.format(err_train))

# data = loadmat(os.path.join('Data', 'spamTest.mat'))
# Xtest = data["Xtest"]
# ytest = data["ytest"]

# ytest = ytest.flatten()

# predictions_test = utils.svmPredict(model, Xtest)
# err_test = np.mean(predictions_test == ytest) * 100
# print('Test Accuracy: {} % '.format(err_test))