コード例 #1
0
def learn_batch_performance_params (m_val):
    testing_file = batch_performance_datasets[m_val][0]
    training_file = batch_performance_datasets[m_val][1]
    
    testing_set = file_read (testing_file)
    training_set = file_read (training_file)
    
    partition = partition_data (testing_set)
    D1 = partition.training
    D2 = partition.testing
    
    perceptron_params = perceptron_learn_batch_performance_params (D1, D2)
    perceptron_params_without_margin = perceptron_params.get_params (0)
    perceptron_params_with_margin = perceptron_params.get_params (1)
    print 'perceptron without margin acc(D2): ' + str(perceptron_params_without_margin.accuracy)
    print 'perceptron with margin acc(D2): ' + str(perceptron_params_with_margin.accuracy)
    
    print '\n\nRunning perceptron on Test set...'
    
    perceptron_trained_without_margin = perceptron_train (batch_performance_n, training_set, perceptron_params_without_margin.gamma, perceptron_params_without_margin.eta)
    perceptron_trained_with_margin = perceptron_train (batch_performance_n, training_set, perceptron_params_with_margin.gamma, perceptron_params_with_margin.eta)
    perceptron_mistakes_without_margin = perceptron_test (testing_set, perceptron_trained_without_margin[0], perceptron_trained_without_margin[1])
    perceptron_mistakes_with_margin = perceptron_test (testing_set, perceptron_trained_with_margin[0], perceptron_trained_with_margin[1])
    print 'perecptron without margin acc(Test): ' + str(1.0 - float(perceptron_mistakes_without_margin[len(perceptron_mistakes_without_margin)-1]) / len (testing_set))
    print 'perecptron with margin acc(Test): ' + str(1.0 - float(perceptron_mistakes_with_margin[len(perceptron_mistakes_with_margin)-1]) / len (testing_set))
    
    winnow_params = winnow_learn_batch_performance_params (D1, D2)
    winnow_params_without_margin = winnow_params.get_params (0)
    winnow_params_with_margin = winnow_params.get_params (1)
    print 'winnow without margin acc(D2): ' + str(winnow_params_without_margin.accuracy)
    print 'winnow with margin acc(D2): ' + str(winnow_params_with_margin.accuracy)
    
    print '\n\nRunning winnow on Test set...'
    
    winnow_trained_without_margin = winnow_train (batch_performance_n, training_set, winnow_params_without_margin.gamma, winnow_params_with_margin.eta)
    winnow_trained_with_margin = winnow_train (batch_performance_n, training_set, winnow_params_with_margin.gamma, winnow_params_with_margin.eta)
    winnow_mistakes_without_margin = winnow_test (testing_set, winnow_trained_without_margin)
    winnow_mistakes_with_margin = winnow_test (testing_set, winnow_trained_with_margin)
    print 'winnow without margin acc(Test): ' + str(1.0 - float(winnow_mistakes_without_margin[len(winnow_mistakes_without_margin)-1]) / len(testing_set))
    print 'winnow with margin acc(Test): ' + str(1.0 - float(winnow_mistakes_with_margin[len(winnow_mistakes_with_margin)-1]) / len(testing_set))
コード例 #2
0
ファイル: perc_test.py プロジェクト: YifengQ/Machine-Learning
import perceptron as P
import numpy as np
print_stuff = True

if print_stuff:
    print("\n\n\nPerceptron tests:")
Perc_X_train = np.array([[0, 1], [1, 0], [5, 4], [1, 1], [3, 3], [2, 4], [1, 6]])
Perc_Y_train = np.array([[1], [1], [-1], [1], [-1], [-1], [-1]])
[w, b] = P.perceptron_train(Perc_X_train, Perc_Y_train)
perc_test = P.perceptron_test(Perc_X_train, Perc_Y_train, w, b)
if print_stuff:
    print("W from sample =", w, "B from sample =", b)
    print("Test on self form sample: ", perc_test)
    print("Testing for non-linearly seperable data")
Perc_stuff_X = np.array(
    [[1, 0], [7, 4], [9, 6], [2, 1], [4, 8], [0, 3], [13, 5], [6, 8], [7, 3], [3, 6], [2, 1], [8, 3], [10, 2],
     [3, 5], [5, 1], [1, 9], [10, 3], [4, 1], [6, 6], [2, 2]])
Perc_stuff_Y = np.array(
    [[1], [1], [-1], [1], [-1], [-1], [-1], [1], [1], [-1], [1], [-1], [-1], [-1], [1], [1], [-1], [1], [-1], [-1]])
[w, b] = P.perceptron_train(Perc_stuff_X, Perc_stuff_Y)
someTest = P.perceptron_test(Perc_stuff_X, Perc_stuff_Y, w, b)
if print_stuff:
    print("Non-linearly seperable data test w=", w, "b=", b, "Accuracy on self =", someTest)
Perc_random_X = np.array([[1.84724509, 2.23182926],
                          [1.22695894, 1.6611229],
                          [2.13212121, 4.63313796],
                          [7.78081405, 4.11930532],
                          [7.28450063, 3.90368111],
                          [1.29216053, 2.76912245],
                          [7.0384763, 2.80881342],
                          [1.22081714, 3.80955021],
コード例 #3
0
print("C_3: \n", C_3)
plt.scatter(C_3[:,0], C_3[:,1], label='centers')
plt.scatter(X_2[:,0], X_2[:,1], label='samples')
plt.title('X_2, K=3')
# plt.savefig("k_means_results_3.png")  #Uncomment to save plot as file
plt.show()


# PERCEPTRON TESTING

# Hand-Tested Data
X = np.array( [[1,1], [1,-1], [-1,1], [-1,-1]] )
Y = np.array( [[1], [-1], [-1], [-1]] )
W = p.perceptron_train(X,Y)
print("Hand-Tested Data    W1: ",W[0][0],"  W2: ",W[0][1],"  b:",W[1][0])
test_acc = p.perceptron_test(X,Y,W[0],W[1])
print("Accurancy:",test_acc,"\n")

# Percepton Test Data 
X = np.array( [[0,1], [1,0], [5,4], [1,1], [3,3], [2,4], [1,6]] )
Y = np.array( [[1], [1], [-1], [1], [-1], [-1], [-1]] )
W = p.perceptron_train(X,Y)
print("Preceptron Test Data 1    W1: ",W[0][0],"  W2: ",W[0][1],"  b:",W[1][0])
test_acc = p.perceptron_test(X,Y,W[0],W[1])
print("Accurancy:",test_acc,"\n")

# Perceptron Test Data - Writeup
X = np.array( [[-2,1], [1,1], [1.5,-0.5], [-2,-1], [-1,-1.5], [2,-2]] )
Y = np.array( [[1], [1], [1], [-1], [-1], [-1]] )
W = p.perceptron_train(X,Y)
print("Preceptron Test Data 2    W1: ",W[0][0],"  W2: ",W[0][1],"  b:",W[1][0])
コード例 #4
0
print("Clustering")
#test
C1 = clu.K_Means(cluX1, 3)
#print(C1)
#writeup
C2 = clu.K_Means(cluX2, 2)
print("\tK=2", C2)
C3 = clu.K_Means(cluX2, 3)
print("\tK=3", C3)
#writeup
CBetter1 = clu.K_Means_better(cluX2, 2)
print("\tBetter K=2", CBetter1)
CBetter2 = clu.K_Means_better(cluX2, 3)
print("\tBetter K=3", CBetter2)
print()

print("Perceptron")
#test
W_B1 = per.perceptron_train(perX1, perY1)
perAcc1 = per.perceptron_test(perX1, perY1, W_B1[0], W_B1[1])
#print("\tW",W_B1[0])
#print("\tB",W_B1[1])
#print("\tAccuracy", perAcc1)
#writeup
W_B2 = per.perceptron_train(perX2, perY2)
perAcc2 = per.perceptron_test(perX2, perY2, W_B2[0], W_B2[1])
print("\tW", W_B2[0])
print("\tB", W_B2[1])
print("\tAccuracy", perAcc2)
コード例 #5
0
def main():
    args = parse_args()

    data = load_data('data/adult.data')
    test_data = load_data('data/adult.test2')
    val_data = load_data('data/adult.val')

    if args.depth_plot:
        print('Calculating f1-scores for different depths...')
        depths, scores = dt.tune_max_depth(data, val_data)
        plt.plot(depths, scores)
        plt.ylabel('F1-score')
        plt.xlabel('Maximum Depth')
        plt.show()
        quit()

    baseline_tree = dt.build_decision_tree(
        data, max_depth=1, forced_attribute=args.baseline_attribute)
    print('Building decision tree...')
    dt_start = time.time()
    if args.depth is not None:
        tree = dt.build_decision_tree(data, max_depth=args.depth)
    else:
        tree = dt.build_decision_tree(data)

    print('Decision tree built in ' + str(time.time() - dt_start) + ' s.')

    baseline_metrics = compute_metrics(dt.decision_tree_classify, test_data,
                                       [baseline_tree])
    dt_metrics = compute_metrics(dt.decision_tree_classify, test_data, [tree])

    if args.rep:
        print('Pruning decision tree (reduced error)...')
        dtre_start = time.time()
        dt.reduced_error_prune(tree, val_data)
        print('Decision tree pruned (reduced error) in ' +
              str(time.time() - dtre_start) + ' s.')
        dtre_metrics = compute_metrics(dt.decision_tree_classify, test_data,
                                       [tree])
    elif args.csp:
        print('Pruning decision tree (chi-square)...')
        dtcs_start = time.time()
        dt.chi_square_prune(tree)
        print('Decision tree pruned (chi-square) in ' +
              str(time.time() - dtcs_start) + ' s.')
        dtcs_metrics = compute_metrics(dt.decision_tree_classify, test_data,
                                       [tree])

    y_train = get_labels(data)
    y_test = get_labels(test_data)

    features = extract_features(data, test_data)
    X_train = features[0]
    X_test = features[1]
    feature_names = features[2]
    print('Building logistic regression model...')
    lr_start = time.time()
    lr_model = LogisticRegression(solver='sag').fit(X_train, y_train)

    print('Logistic regression model built in ' + str(time.time() - lr_start) +
          ' s.')

    if args.lr_top is not None:
        print('Top weighted features in logistic regression model: ' +
              str(get_lr_top_weights(lr_model, args.lr_top, feature_names)[0]))
    if args.lr_bot is not None:
        print(
            'Top negatively weighted features in logistic regression model: ' +
            str(get_lr_top_weights(lr_model, args.lr_bot, feature_names)[1]))

    lr_pred = lr_model.predict(X_test)

    weights = perceptron.perceptron(X_train, y_train, 10)
    perceptron_pred = perceptron.perceptron_test(X_test, weights)

    perceptron_metrics = [
        y_test[i] == perceptron_pred[i] for i in range(len(y_test))
    ].count(True) / len(test_data), precision_score(
        y_test, perceptron_pred), recall_score(y_test,
                                               perceptron_pred), f1_score(
                                                   y_test, perceptron_pred)
    lr_metrics = [y_test[i] == lr_pred[i] for i in range(len(y_test))
                  ].count(True) / len(test_data), precision_score(
                      y_test, lr_pred), recall_score(y_test,
                                                     lr_pred), f1_score(
                                                         y_test, lr_pred)

    print('Baseline:')
    print('Accuracy: ' + str(baseline_metrics[0]))
    print('Precision: ' + str(baseline_metrics[1]))
    print('Recall: ' + str(baseline_metrics[2]))
    print('F1 Score: ' + str(baseline_metrics[3]))

    print('\nDecision Tree:')
    print('Accuracy: ' + str(dt_metrics[0]))
    print('Precision: ' + str(dt_metrics[1]))
    print('Recall: ' + str(dt_metrics[2]))
    print('F1 Score: ' + str(dt_metrics[3]))

    if args.rep:
        print('\nDecision Tree (w/ reduced error pruning):')
        print('Accuracy: ' + str(dtre_metrics[0]))
        print('Precision: ' + str(dtre_metrics[1]))
        print('Recall: ' + str(dtre_metrics[2]))
        print('F1 Score: ' + str(dtre_metrics[3]))
    elif args.csp:
        print('\nDecision Tree (w/ chi-square pruning):')
        print('Accuracy: ' + str(dtcs_metrics[0]))
        print('Precision: ' + str(dtcs_metrics[1]))
        print('Recall: ' + str(dtcs_metrics[2]))
        print('F1 Score: ' + str(dtcs_metrics[3]))

    print('\nPerceptron:')
    print('Accuracy: ' + str(perceptron_metrics[0]))
    print('Precision: ' + str(perceptron_metrics[1]))
    print('Recall: ' + str(perceptron_metrics[2]))
    print('F1 Score: ' + str(perceptron_metrics[3]))

    print('\nLogistic Regression:')
    print('Accuracy: ' + str(lr_metrics[0]))
    print('Precision: ' + str(lr_metrics[1]))
    print('Recall: ' + str(lr_metrics[2]))
    print('F1 Score: ' + str(lr_metrics[3]))

    if args.plot:
        metrics_baseline = (baseline_metrics[0], baseline_metrics[1],
                            baseline_metrics[2], baseline_metrics[3])
        metrics_dt = (dt_metrics[0], dt_metrics[1], dt_metrics[2],
                      dt_metrics[3])
        metrics_perceptron = (perceptron_metrics[0], perceptron_metrics[1],
                              perceptron_metrics[2], perceptron_metrics[3])
        metrics_lr = (lr_metrics[0], lr_metrics[1], lr_metrics[2],
                      lr_metrics[3])
        metrics_dtre, metrics_dtcs = None, None
        if args.rep:
            metrics_dtre = (dtre_metrics[0], dtre_metrics[1], dtre_metrics[2],
                            dtre_metrics[3])
        elif args.csp:
            metrics_dtcs = (dtcs_metrics[0], dtcs_metrics[1], dtcs_metrics[2],
                            dtcs_metrics[3])
        plot_metrics(metrics_baseline, metrics_dt, metrics_perceptron,
                     metrics_lr, metrics_dtre, metrics_dtcs)
コード例 #6
0
###################################################################################################################################################
# filename: testPreceptron.py
# author: Sara Davis
# date: 10/10/2018
# version: 1.0
# description: runs testPerceptron.py
#############################################################################################################################################

import numpy as np

from perceptron import perceptron_train
from perceptron import perceptron_test

#X_test = np.array([[0,1], [1,0], [5,4], [1,1], [3,3], [2,4], [1,6]]) #should be -2, -2, 6
#Y_test = np.array([[1], [1], [0], [1], [0], [0], [0]])
X_test = np.array([[0, 0], [1, 1], [0, 1], [2, 2], [1, 0], [1, 2]])
Y_test = np.array([[-1], [1], [-1], [1], [-1], [1]])
X = np.array([[-2, 1], [1, 1], [1.5, -.5], [-2, -1], [-1, -1.5], [2, -2]])
Y = np.array([[1], [1], [1], [-1], [-1], [-1]])

W = perceptron_train(X, Y)
print(W)

test_acc = perceptron_test(X_test, Y_test, W[0], W[1])
print(test_acc)
コード例 #7
0
  Y = []
  for item in x_str:
    temp = [float(x) for x in item.split(',')]
    X.append(temp)
  if len(y_str)>0:
    for item in y_str:
      temp = int(item)
      Y.append(temp)
  X = np.array(X)
  Y = np.array(Y)
  return X, Y


X,Y = load_data("data_1.txt")
w,b = p.perceptron_train(X,Y)
test_acc = p.perceptron_test(X,Y,w,b)
print("Perceptron:",test_acc)

X,Y = load_data("data_2.txt")
w,b = p.perceptron_train(X,Y)
X,Y = load_data("data_1.txt")
test_acc = p.perceptron_test(X,Y,w,b)
print("Perceptron:",test_acc)


def df_test1(x):
  return np.array([2*x[0]])

x = gd.gradient_descent(df_test1,np.array([5.0]),0.1)   
print("Gradient Descent:", x)
コード例 #8
0
def main():
    data = load_data('data/adult.data')
    baseline_tree = dt.build_decision_tree(data, max_depth=1)
    print('Building decision tree...')
    dt_start = time.time()
    tree = dt.build_decision_tree(data)
    print('Decision tree built in ' + str(time.time() - dt_start) + ' s.')

    test_data = load_data('data/adult.val')
    baseline_metrics = compute_metrics(dt.decision_tree_classify, test_data, [baseline_tree])
    dt_metrics = compute_metrics(dt.decision_tree_classify, test_data, [tree])
    
    y_train = get_labels(data)
    y_test = get_labels(test_data)

    features = extract_features(data, test_data)
    X_train = features[0]
    X_test = features[1]

    print('Building logistic regression model...')
    lr_start = time.time()
    lr_model = build_lr_model(X_train, y_train)
    print('Logistic regression model built in ' + str(time.time() - lr_start) + ' s.')

    lr_pred = lr_model.predict(X_test)

    #perceptron
    weights = perceptron.perceptron(X_train, y_train, 6)
    perceptron_pred=perceptron.perceptron_test(X_test,weights)

    #skilearn model's perceptron
    perceptron_ski = build_perceptron_ski(X_train, y_train)
    y_percep_pred = perceptron_ski.predict(X_test)
    '''
    Result:
    Accuracy: 0.8032061912658928
    Precision: 0.5655369538587178
    Recall: 0.7202288091523661
    F1 Score: 0.6335773101555352
    '''

    # Gaussian Naive Bayes
    naive_bayes_model = build_naive_bayes(X_train, y_train)
    y_naive_bayes_pred = naive_bayes_model.predict(X_test)

    '''
    Result:
    Accuracy: 0.48473680977826916
    Precision: 0.3092619027626165
    Recall: 0.9576183047321893
    F1 Score: 0.4675341161536021
    '''


    print('Baseline:')
    print('Accuracy: ' + str(baseline_metrics[0]))
    print('Precision: ' + str(baseline_metrics[1]))
    print('Recall: ' + str(baseline_metrics[2]))
    print('F1 Score: ' + str(baseline_metrics[3]))
    
    print('\nDecision Tree:')
    print('Accuracy: ' + str(dt_metrics[0]))
    print('Precision: ' + str(dt_metrics[1]))
    print('Recall: ' + str(dt_metrics[2]))
    print('F1 Score: ' + str(dt_metrics[3]))

    print('\nLogistic Regression:')
    print('Accuracy: ' + str([y_test[i] == lr_pred[i] for i in range(len(y_test))].count(True) / len(test_data)))
    print('Precision: ' + str(precision_score(y_test, lr_pred)))
    print('Recall: ' + str(recall_score(y_test, lr_pred)))
    print('F1 Score: ' + str(f1_score(y_test, lr_pred)))

    print('\nPerceptron Regression:')
    print('Accuracy: ' + str([y_test[i] == perceptron_pred[i] for i in range(len(y_test))].count(True) / len(test_data)))
    print('Precision: ' + str(precision_score(y_test, perceptron_pred)))
    print('Recall: ' + str(recall_score(y_test, perceptron_pred)))
    print('F1 Score: ' + str(f1_score(y_test, perceptron_pred)))

    print('\nPerceptron Regression (ski):')
    print('Accuracy: ' + str([y_test[i] == y_percep_pred[i] for i in range(len(y_test))].count(True) / len(test_data)))
    print('Precision: ' + str(precision_score(y_test, y_percep_pred)))
    print('Recall: ' + str(recall_score(y_test, y_percep_pred)))
    print('F1 Score: ' + str(f1_score(y_test, y_percep_pred)))

    print('\nNaive Bayes (ski):')
    print('Accuracy: ' + str([y_test[i] == y_naive_bayes_pred[i] for i in range(len(y_test))].count(True) / len(test_data)))
    print('Precision: ' + str(precision_score(y_test, y_naive_bayes_pred)))
    print('Recall: ' + str(recall_score(y_test, y_naive_bayes_pred)))
    print('F1 Score: ' + str(f1_score(y_test, y_naive_bayes_pred)))

    print("\nCross Validation")
コード例 #9
0
# Evaluate the perceptron algorithm on the corresponding test examples for each version by reading the parameter vectors
# from the corresponding text files.

with open('newsgroups_model_p1.txt', 'r') as f:
    wp1 = f.readlines()

wp1 = np.asarray(wp1, dtype=np.float64)
wp1 = np.reshape(wp1, (-1, 1))

with open('newsgroups_model_p2.txt', 'r') as f:
    wp2 = f.readlines()

wp2 = np.asarray(wp2, dtype=np.float64)
wp2 = np.reshape(wp2, (-1, 1))

pred1 = perceptron_test(wp1, tsdata1)
pred2 = perceptron_test(wp2, tsdata2)

# Report test accuracy.

acc1 = np.mean(tslabels1 == pred1)
print('Accuracy of perceptron on test dataset version 1: %0.3f%%.' %
      (acc1 * 100))

acc2 = np.mean(tslabels2 == pred2)
print('Accuracy of perceptron on test dataset version 2: %0.3f%%.' %
      (acc2 * 100))

# Carry out same procedure for the average perceptron algorithm as that for the perceptron algorithm.

aw1, aerror1 = aperceptron_train(trdata1, trlabels1, 10000)
コード例 #10
0
ファイル: run.py プロジェクト: jkcn90/Machine_Learning_2014
 is_spam_list_training,
 vocabulary_list) = create_feature_vectors.run('./output_data/training_set')

# Part 3/4: Train the data on the training set and return the last weight vector. Test the percent
# error when this weight is run on the validation set
print('\n=========================================================================================')
print('Problem 4:')
(weight_vector,
 total_number_of_misclassifications,
 number_of_runs) = perceptron.perceptron_train(feature_vector_list_training, is_spam_list_training)

(feature_vector_list_validation,
 is_spam_list_validation,
 _) = create_feature_vectors.run('./output_data/validation_set', vocabulary_list)

training_set_error = perceptron.perceptron_test(
                            weight_vector, feature_vector_list_training, is_spam_list_training)
validation_set_error = perceptron.perceptron_test(
                            weight_vector, feature_vector_list_validation, is_spam_list_validation)

print('Total number of misclassifications: ' + str(total_number_of_misclassifications))
print('Training set error: ' + str(training_set_error))
print('Validation set error: ' + str(validation_set_error))

# Part 5: Find words in the vocabulary with the most positive and negative weights
print('\n=========================================================================================')
print('Problem 5:')
sorted_weight_index_least_to_greatest = sorted(range(len(weight_vector)),
                                               key=lambda k: weight_vector[k])
top_most_positive_weights = [vocabulary_list[index]
                             for index in sorted_weight_index_least_to_greatest[-15:]]
top_most_positive_weights = list(reversed(top_most_positive_weights))
コード例 #11
0
XTrain = np.genfromtxt('XTrain.csv', delimiter=',')
yTrain = np.genfromtxt('yTrain.csv', delimiter=',')
yTrain = yTrain.reshape((yTrain.shape[0], 1))
XTest = np.genfromtxt('XTest.csv', delimiter=',')
yTest = np.genfromtxt('yTest.csv', delimiter=',')
yTest = yTest.reshape((yTest.shape[0], 1))

#get the number of features
d = XTrain.shape[1]
n = XTrain.shape[0]
m = XTest.shape[0]

#experiment 1, original perceptron
w0 = np.zeros((d, 1))
w = perceptron.perceptron_train(w0, XTrain, yTrain, 10)
rate1 = perceptron.perceptron_test(w, XTest, yTest)
print(rate1)
#result: error rate: 0.03833

#experiment 2, kernel perceptron
sigmaList = [0.01, 0.1, 1, 10, 100, 1000]

for sigma in sigmaList:

    error_case = 0
    a0 = np.zeros((n, 1))
    a = perceptron.kernel_perceptron_train(a0, XTrain, yTrain, 2, sigma)

    for i in range(0, m):
        yHat = perceptron.kernel_perceptron_predict(a, XTrain, yTrain,
                                                    XTest[i, :], sigma)
コード例 #12
0
plt.xlabel('Epochs')
plt.ylabel("Errors")
plt.plot(error, 'bo-', label='Total errors during epoch')
plt.legend()
plt.savefig('perceptron_train.png')
plt.close()

# Test the perceptron algorithm  on the test data by reading the parameter vector from spam_model_p.txt.

with open('spam_model_p.txt', 'r') as f:
    w1 = f.readlines()

w1 = np.asarray(w1, dtype=np.float64)
w1 = np.reshape(w1, (-1, 1))

pred1 = perceptron_test(w1, data1)

# Report test accuracy.

acc1 = np.mean(labels1 == pred1)
print('\nAccuracy on test data: %0.2f%%.' % (acc1 * 100))

# Carry out same procedure for Average perceptron algorithm as done for the Vanilla perceptron algorithm.

print("\nAverage Perceptron algorithm: \n")

aw, aerror = aperceptron_train(data, labels, 50)

ap = np.savetxt('spam_model_ap.txt', aw)

print("\nNumber of mistakes during each epoch:")