Beispiel #1
0
def dataset3_params(X, y, Xval, yval):
    all_C = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    all_sigma = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    best_C = all_C[0]
    best_sigma = all_sigma[0]
    previous_err = 1000.0
    for C in all_C:
        for sigma in all_sigma:
            gamma = 1.0 / (2.0 * sigma ** 2)
            model = libsvm.fit(X, y, kernel='rbf', C=C, gamma=gamma)
            predictions = libsvm.predict(
                Xval,
                support=model[0],
                SV=model[1],
                nSV=model[2],
                sv_coef=model[3],
                intercept=model[4],
                label=model[5],
                probA=model[6],
                probB=model[7],
                kernel='rbf',
                gamma=gamma
            )
            err = np.mean(predictions != yval)
            if err < previous_err:
                best_C = C
                best_sigma = sigma
                previous_err = err
    return (best_C, best_sigma)
def train_data(featureMatrix, labels):

    #Need to transform feature vector into something recognized by scikit learn
    #try to keep it a sparse vector so that memory utilization is low. Can we
    #do feature hashing? (what is it and how will it help?)


    model = libsvm.fit(featureMatrix.toarray(), array(labels, dtype='float64'))

    return model
    return (arr-mean)/math.sqrt(variance)

# Training set and labels are loaded here
X,Y = load_svmlight_file(f='train_raw', n_features=16, multilabel=False, zero_based='auto', query_id=False, dtype=np.float64)
X = X.toarray()

# The original training and test labels are normalised here
for j in range(0,len(X)):
    X[j] = normaliseData(X[j])
for j in range(0,len(data)):
    data[j] = normaliseData(data[j])


# SVM model is trained here using libsvm in-built library functions. Kernel typr is polynomial here and its degree is kept 4.
# All parameters are kept for making the most optimal fit for the data
[support, sv, nsv, coeff, intercept, proba, probb, fit_status] = libsvm.fit(X, Y, svm_type=0, kernel='poly', degree=4,
        gamma=0.093, coef0=0, tol=0.001, C=1, nu=0.5, max_iter=-1, random_seed=0)

m = [support, sv, nsv, coeff, intercept, proba, probb]
save_model('model.pkl',m)

[support_, sv_, nsv_, coeff_, intercept_, proba_, probb_] = load_model('model.pkl')
# Predictions are made on the test dataset using the hyper parameters trained on training dataset
dec_values = libsvm.predict(data, support_, sv_, nsv_, coeff_, intercept_, proba_, probb_, svm_type=0,kernel='poly', degree =4,
                            gamma=0.093, coef0=0)
dec_values.astype(int)

# Predictions are written to a csv file named result.csv
j=0
with open('result.csv', 'w') as csvfile:
    spamwriter = csv.writer(csvfile)
    for j in range(0,len(dec_values)):
Beispiel #4
0
lsRBF = LabelSpreading(kernel='rbf')
y_pred_lsRBF = lsRBF.fit(mediumTrainingLst_input, mediumTrainingLst_target)

#%%
lsKNNC = LabelSpreading(kernel='knn')
y_pred_lsKNNC = lsKNNC.fit(mediumClusterTrainingLst_input,
                           mediumClusterTrainingLst_target)
lsRBFC = LabelSpreading(kernel='rbf')
y_pred_lsRBFC = lsRBFC.fit(mediumClusterTrainingLst_input,
                           mediumClusterTrainingLst_target)
#%%
'''
Libsvm
'''
#lsvm = libsvm()
y_pred_lsvm = libsvm.fit(np.array(smallTrainingLst_input),
                         np.array(smallTrainingLst_target))


#%%
def svmScore(predictLst, targetLst):
    correctCount = 0
    for i in range(len(predictLst)):
        if predictLst[i] == targetLst[i]:
            correctCount += 1
    return correctCount / len(predictLst)


print('Testing with training data:')
#print('Gaussian Naive Bayes, 100% Data: ', end = '')
#print(gnb.score(trainingLst_input[:100], trainingLst_target[:100]))
#print('Multinomial Naive Bayes, 100% Data: ', end = '')
    def build(self, x_train, y_train, path, **parameter):
        x = x_train.as_matrix()
        x = x.copy(order='C').astype(np.float64)
        y = y_train.as_matrix().astype(np.float64)

        self.model = libsvm.fit(x, y, **parameter)
Beispiel #6
0

if __name__ == '__main__':
    file_contents = open('../../octave/mlclass-ex6/emailSample1.txt', 'r').read()
    vocabulary = get_vocabulary()
    word_indices = process_email(file_contents, vocabulary)
    print('Word indices:\n%s' % word_indices)
    features = email_features(word_indices, vocabulary)
    print('Length of feature vector: %d' % len(features))
    print('Number of non-zero entries: %d' % sum(features > 0))
    # train SVM for spam classification
    data = loadmat('../../octave/mlclass-ex6/spamTrain.mat')
    X = np.require(data['X'], dtype=np.float64, requirements='C_CONTIGUOUS')
    y = np.require(data['y'].flatten(), dtype=np.float64)
    C = 0.1
    model = libsvm.fit(X, y, kernel='linear', C=C)
    predictions = libsvm.predict(
        X,
        support=model[0],
        SV=model[1],
        nSV=model[2],
        sv_coef=model[3],
        intercept=model[4],
        label=model[5],
        probA=model[6],
        probB=model[7],
        kernel='linear',
    )
    accuracy = 100 * np.mean(predictions == y)
    print('Training set accuracy: %0.2f %%' % accuracy)
    # load test set
Beispiel #7
0
            err = np.mean(predictions != yval)
            if err < previous_err:
                best_C = C
                best_sigma = sigma
                previous_err = err
    return (best_C, best_sigma)


if __name__ == '__main__':
    data = loadmat('../../octave/mlclass-ex6/ex6data1.mat')
    X = np.require(data['X'], dtype=np.float64, requirements='C_CONTIGUOUS')
    y = np.require(data['y'].flatten(), dtype=np.float64)
    plot_data(X, y)
    print('Training Linear SVM ...')
    C = 1.0
    model = libsvm.fit(X, y, kernel='linear', tol=0.001, C=C, max_iter=20)
    visualize_boundary_linear(X, y, model)
    # evaluate gaussian kernel
    x1 = np.array([1.0, 2.0, 1.0])
    x2 = np.array([0.0, 4.0, -1.0])
    sigma = 2.0
    value = gaussian_kernel(x1, x2, sigma)
    print(
        'Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = 0.5: %f'
        % value)
    print('(this value should be about 0.324652)')
    # dataset 2
    data = loadmat('../../octave/mlclass-ex6/ex6data2.mat')
    X = np.require(data['X'], dtype=np.float64, requirements='C_CONTIGUOUS')
    y = np.require(data['y'].flatten(), dtype=np.float64)
    plot_data(X, y)
Beispiel #8
0
# original email was processed using the processEmail and emailFeatures
# functions and converted into a vector x(i) wit a size of 1899.(4000X1899 for svmtrain.mat,
# 1000X1899 for svmtest.mat )

svmtrain = loadmat(
    'D:\ML\ML\CSR ML\WEEK#7\Machine Learning Assignment#6\Python\spamTrain.mat'
)
X = np.require(svmtrain['X'], dtype=np.float64, requirements='C')  # 51X2
#print(X.flags)
y = np.require(svmtrain['y'].flatten(), dtype=np.float64)  # 51X1

print('Training Linear SVM (Spam Classification)')
print('(this may take 1 to 2 minutes) ...')

C = 0.1
model = libsvm.fit(X, y, kernel='linear', C=C)
p = libsvm.predict(X,
                   support=model[0],
                   SV=model[1],
                   nSV=model[2],
                   sv_coef=model[3],
                   intercept=model[4],
                   probA=model[5],
                   probB=model[6],
                   kernel='linear')

accuracyTrain = np.mean(p == y) * 100
print('Training accuracy', accuracyTrain)

# Test Spam Classification
svmtest = loadmat(
Beispiel #9
0
            err = np.mean(predictions != yval)
            if err < previous_err:
                best_C = C
                best_sigma = sigma
                previous_err = err
    return (best_C, best_sigma)


if __name__ == '__main__':
    data = loadmat('../../octave/mlclass-ex6/ex6data1.mat')
    X = np.require(data['X'], dtype=np.float64, requirements='C_CONTIGUOUS')
    y = np.require(data['y'].flatten(), dtype=np.float64)
    plot_data(X, y)
    print('Training Linear SVM ...')
    C = 1.0
    model = libsvm.fit(X, y, kernel='linear', tol=0.001, C=C, max_iter=20)
    visualize_boundary_linear(X, y, model)
    # evaluate gaussian kernel
    x1 = np.array([1.0, 2.0, 1.0])
    x2 = np.array([0.0, 4.0, -1.0])
    sigma = 2.0
    value = gaussian_kernel(x1, x2, sigma)
    print('Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = 0.5: %f' % value)
    print('(this value should be about 0.324652)')
    # dataset 2
    data = loadmat('../../octave/mlclass-ex6/ex6data2.mat')
    X = np.require(data['X'], dtype=np.float64, requirements='C_CONTIGUOUS')
    y = np.require(data['y'].flatten(), dtype=np.float64)
    plot_data(X, y)
    print('Training SVM with RBF Kernel ...')
    C = 1.0