Beispiel #1
0
def dataset3Params(X, y, Xval, yval):
    """   
    Instructions
    ------------
    Fill in this function to return the optimal C and sigma learning 
    parameters found using the cross validation set.
    You can use `svmPredict` to predict the labels on the cross
    validation set. For example, 
    
        predictions = svmPredict(model, Xval)

    will return the predictions on the cross validation set.

    """
    # You need to return the following variables correctly.
    C = 1
    sigma = 0.3

    # ====================== YOUR CODE HERE ======================
    testvalues = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    best_error = 10000

    for C_i in testvalues:
        for sigma_j in testvalues:
            predictions = utils.svmPredict(
                utils.svmTrain(X, y, C_i, gaussianKernel, args=(sigma_j, )),
                Xval)
            error = np.mean(predictions != yval)
            if error < best_error:
                best_error = error
                C = C_i
                sigma = sigma_j

    # ============================================================
    return C, sigma
    def dataset3Params(X, y, Xval, yval):

        # You need to return the following variables correctly.
        c_final = 1

        sigma_final = 0.3

        test_vals = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]

        lowest_error = np.inf

        for c in test_vals:

            for s in test_vals:

                model = utils.svmTrain(X, y, c, gaussianKernel, (s))

                predictions = utils.svmPredict(model, Xval)

                error = np.mean(predictions != yval)

                if (error < lowest_error):

                    lowest_error = error

                    c_final = c

                    sigma_final = s

        return c_final, sigma_final
Beispiel #3
0
def predict_api():
    data = request.get_json(force=True)
    final_features = ''.join(list(data.values()))
    word_indices = processEmail(final_features, verbose=False)
    x = emailFeatures(word_indices)
    prediction = utils.svmPredict(model, x)
    output = 'spam' if prediction else 'not spam'
    return jsonify(output)
Beispiel #4
0
def predict():
    int_features = [x for x in request.form.values()]
    final_features = ''.join(int_features)
    word_indices = processEmail(final_features, verbose=False)
    x = emailFeatures(word_indices)
    prediction = utils.svmPredict(model, x)
    output = 'spam' if prediction else 'not spam'
    return render_template('index.html',
                           prediction_text='This email is {}'.format(output))
Beispiel #5
0
def dataset3Params(X, y, Xval, yval):
    C = 0
    sigma = 0
    correctness = 0
    grid_list = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    for C_i in grid_list:
        for sigma_i in grid_list:
            model_i = utils.svmTrain(X,
                                     y,
                                     C_i,
                                     gaussianKernel,
                                     args=(sigma_i, ))
            predictions_i = utils.svmPredict(model_i, Xval)
            correctness_i = np.mean(predictions_i == yval)
            if correctness_i > correctness:
                C = C_i
                sigma = sigma_i
                correctness = correctness_i
            print(C, sigma, correctness_i)
    return C, sigma
Beispiel #6
0
def dataset3Params(X, y, Xval, yval):
    # You need to return the following variables correctly.
    C = 1
    sigma = 0.3

    # ====================== YOUR CODE HERE ======================
    c = {0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30}
    s = {0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30}
    best_prediction = 100000000.0
    for C in c:
        for sigma in s:
            model = utils.svmTrain(X, y, C, gaussianKernel, args=(sigma, ))
            predictions = utils.svmPredict(model, Xval)
            current_prediction = np.mean(predictions != yval)
            if current_prediction < best_prediction:
                best_prediction = current_prediction
                final_C = C
                final_sigma = sigma

    # ============================================================
    #return C, sigma
    return final_C, final_sigma
def dataset3Params(X, y, Xval, yval):
    # You need to return the following variables correctly.
    # C = 1
    # sigma = 0.3

    number_vec = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    predicts = []

    for i in range(len(number_vec)):
        for j in range(len(number_vec)):
            C = number_vec[i]
            sigma = number_vec[j]
            model = utils.svmTrain(X, y, C, gaussianKernel, args=(sigma, ))
            predictions = utils.svmPredict(model, Xval)
            err_cv = np.mean(predictions != yval)
            temp_tuple = (C, sigma, err_cv)
            predicts.append(temp_tuple)

    # print(len(predicts))
    tam = sorted(predicts, key=lambda tup: tup[2])
    # print(tam[0])
    # return C, sigma
    return tam[0][0], tam[0][1]
Beispiel #8
0
def dataset3Params(X, y, Xval, yval):
    """
    Returns your choice of C and sigma for Part 3 of the exercise
    where you select the optimal (C, sigma) learning parameters to use for SVM
    with RBF kernel.

    Parameters
    ----------
    X : array_like
        (m x n) matrix of training data where m is number of training examples, and
        n is the number of features.

    y : array_like
        (m, ) vector of labels for ther training data.

    Xval : array_like
        (mv x n) matrix of validation data where mv is the number of validation examples
        and n is the number of features

    yval : array_like
        (mv, ) vector of labels for the validation data.

    Returns
    -------
    C, sigma : float, float
        The best performing values for the regularization parameter C and
        RBF parameter sigma.

    Instructions
    ------------
    Fill in this function to return the optimal C and sigma learning
    parameters found using the cross validation set.
    You can use `svmPredict` to predict the labels on the cross
    validation set. For example,

        predictions = svmPredict(model, Xval)

    will return the predictions on the cross validation set.

    Note
    ----
    You can compute the prediction error using

        np.mean(predictions != yval)
    """
    # You need to return the following variables correctly.
    C = 1
    sigma = 0.3

    # ====================== YOUR CODE HERE ======================

    # Range of C and sigma values to be tested.
    C_list = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    sigma_list = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    best_C = 0.01
    best_sigma = 0.01

    # First iteration to simply set best_error.
    model = utils.svmTrain(X, y, best_C, gaussianKernel, args=(best_sigma,))
    predictions = utils.svmPredict(model, Xval)
    best_error = np.mean(predictions != yval)

    # Iterate through all possible training scenarios using each
    # C and sigma value. Save the optimal values based on lowest
    # error value and return them.
    for C in C_list:
        for sigma in sigma_list:

            model = utils.svmTrain(X, y, C, gaussianKernel, args=(sigma,))
            predictions = utils.svmPredict(model, Xval)
            error = np.mean(predictions != yval)

            if error < best_error:
                best_error = error
                best_C = C
                best_sigma = sigma

    # ============================================================
    return best_C, best_sigma
Beispiel #9
0
# -----------------------------------------------------------------------------------------------------

# Load the Spam Email dataset
# You will have X, y in your environment
data = loadmat(os.path.join('Data', 'spamTrain.mat'))
X, y= data['X'].astype(float), data['y'][:, 0]

print('Training Linear SVM (Spam Classification)')
print('This may take 1 to 2 minutes ...\n')

C = 0.1
model = utils.svmTrain(X, y, C, utils.linearKernel)

# Compute the training accuracy
p = utils.svmPredict(model, X)

print('Training Accuracy: %.2f' % (np.mean(p == y) * 100))

# Load the test dataset
# You will have Xtest, ytest in your environment
data = loadmat(os.path.join('Data', 'spamTest.mat'))
Xtest, ytest = data['Xtest'].astype(float), data['ytest'][:, 0]

print('Evaluating the trained Linear SVM on a test set ...')
p = utils.svmPredict(model, Xtest)

print('Test Accuracy: %.2f' % (np.mean(p == ytest) * 100))

# Sort the weights and obtin the vocabulary list
# NOTE some words have the same weights,
Beispiel #10
0
def dataset3Params(X, y, Xval, yval):
    """
    Returns your choice of C and sigma for Part 3 of the exercise 
    where you select the optimal (C, sigma) learning parameters to use for SVM
    with RBF kernel.
    
    Parameters
    ----------
    X : array_like
        (m x n) matrix of training data where m is number of training examples, and 
        n is the number of features.
    
    y : array_like
        (m, ) vector of labels for ther training data.
    
    Xval : array_like
        (mv x n) matrix of validation data where mv is the number of validation examples
        and n is the number of features
    
    yval : array_like
        (mv, ) vector of labels for the validation data.
    
    Returns
    -------
    C, sigma : float, float
        The best performing values for the regularization parameter C and 
        RBF parameter sigma.
    
    Instructions
    ------------
    Fill in this function to return the optimal C and sigma learning 
    parameters found using the cross validation set.
    You can use `svmPredict` to predict the labels on the cross
    validation set. For example, 
    
        predictions = svmPredict(model, Xval)

    will return the predictions on the cross validation set.
    
    Note
    ----
    You can compute the prediction error using 
    
        np.mean(predictions != yval)
    """
    # You need to return the following variables correctly.
    C = 1
    sigma = 0.3

    # ====================== YOUR CODE HERE ======================

    C_array = np.array([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30])
    sigma_array = np.array([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30])
    err_array = np.zeros([C_array.size, sigma_array.size])

    for i in np.arange(C_array.size):
        for j in np.arange(sigma_array.size):
            model = utils.svmTrain(X,
                                   y,
                                   C_array[i],
                                   gaussianKernel,
                                   args=(sigma_array[j], ))
            predictions = utils.svmPredict(model, Xval)
            pred_error = np.mean(predictions != yval)
            err_array[i, j] = pred_error

    ind = np.unravel_index(np.argmin(err_array, axis=None), err_array.shape)
    C = C_array[ind[0]]
    sigma = sigma_array[ind[1]]

    # ============================================================
    return C, sigma
Beispiel #11
0
def dataset3Params(X, y, Xval, yval):
    """
    Returns your choice of C and sigma for Part 3 of the exercise 
    where you select the optimal (C, sigma) learning parameters to use for SVM
    with RBF kernel.
    
    Parameters
    ----------
    X : array_like
        (m x n) matrix of training data where m is number of training examples, and 
        n is the number of features.
    
    y : array_like
        (m, ) vector of labels for ther training data.
    
    Xval : array_like
        (mv x n) matrix of validation data where mv is the number of validation examples
        and n is the number of features
    
    yval : array_like
        (mv, ) vector of labels for the validation data.
    
    Returns
    -------
    C, sigma : float, float
        The best performing values for the regularization parameter C and 
        RBF parameter sigma.
    
    Instructions
    ------------
    Fill in this function to return the optimal C and sigma learning 
    parameters found using the cross validation set.
    You can use `svmPredict` to predict the labels on the cross
    validation set. For example, 
    
        predictions = svmPredict(model, Xval)

    will return the predictions on the cross validation set.
    
    Note
    ----
    You can compute the prediction error using 
    
        np.mean(predictions != yval)
    """
    # You need to return the following variables correctly.
    C = 1
    sigma = 0.3
    error = 10**6
    # ====================== YOUR CODE HERE ======================
    for i in [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]:
        for j in [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]:
            model = utils.svmTrain(X, y, i, gaussianKernel, args=(j, ))
            predictions = utils.svmPredict(model, Xval)
            temp = np.mean(predictions != yval)
            if temp < error:
                error = temp
                C = i
                sigma = j
    # ============================================================
    return C, sigma
#  Since the model we are training is a linear SVM, we can inspect the
#  weights learned by the model to understand better how it is determining
#  whether an email is spam or not. The following code finds the words with
#  the highest weights in the classifier. Informally, the classifier
#  'thinks' that these words are the most likely indicators of spam.
#

## =================== Part 6: Try Your Own Emails =====================
#  Now that you've trained the spam classifier, you can use it on your own
#  emails! In the starter code, we have included spamSample1.txt,
#  spamSample2.txt, emailSample1.txt and emailSample2.txt as examples.
#  The following code reads in one of these emails and then uses your
#  learned SVM classifier to determine whether the email is Spam or
#  Not Spam

# filenames = ['emailSample2.txt', 'emailSample1.txt', 'spamSample2.txt', 'spamSample1.txt']

# for filename in filenames:
#     with open(os.path.join('Data', filename)) as fid: file_contents = fid.read()
#     word_indices  = processEmail(file_contents, False)
#     x = emailFeatures(word_indices)
#     # print(x.shape)

#     prediction = utils.svmPredict(model, x)
#     print('\nProcessed {}\n\nSpam Classification: {}\n'.format(filename, prediction))
#     print('(1 indicates spam, 0 indicates not spam)\n\n')

prediction = utils.svmPredict(model, tam)
print('\nProcessed {}\n\nSpam Classification: {}\n'.format(
    "lam test", prediction))
print('(1 indicates spam, 0 indicates not spam)\n\n')