def main(): iris = datasets.load_iris() x = iris.data[50:150] t = iris.target[50:150] for i in range(100): if t[i] == 2: t[i] = -1 skf = model_selection.StratifiedKFold(n_splits=10) avg_accuracy = 0 for train, test in skf.split(x, t): classifier = PerceptronClassifier(4, 0.1) classifier.learn(t[train], x[train]) accuracy = 0.0 for test_x, test_t in zip(x[test], t[test]): test_y = classifier.classify(test_x) if test_y == test_t: accuracy += 1 accuracy /= len(test) avg_accuracy += accuracy avg_accuracy /= 10 print 'average accuracy:', avg_accuracy * 100, '%'
def basic(): print("-------------basic---------------") x = np.array([[0,0,1],[1,1,1],[1,0,1],[0,1,1]]) y = np.array([[0],[1],[1],[0]]) # print(x) # print(y) pc = PerceptronClassifier(lr=1) # print(pc) print(pc.fit(x, y).score(np.array([[0,0,0],[0,1,0],[1,0,0],[1,1,0]]), np.array([[0],[0],[1],[1]]))) print(pc)
def __init__(self): iris = datasets.load_iris() x = iris.data[0:150] t = iris.target[0:150] for i in range(150): if t[i] == 0: t[i] = -1 if t[i] == 2: t[i] = 1 self.classifier = PerceptronClassifier(4, 0.1) self.classifier.learn(t, x)
def test_01_from_file_02(self): """Verify that a classifier can be constructed from a file with weights.""" classifier = PerceptronClassifier({'good': 1, 'terrible': -1}) classifier.save('data/ex05_from_file_test.model') classifier = PerceptronClassifier.from_file( 'data/ex05_from_file_test.model') if classifier is None: self.fail( msg= "Constructing classifier from file failed: from_file returned None" ) expected_weights = {'good': 1, 'terrible': -1} self.assertEqual(classifier.weights, expected_weights)
def separable(): print("----------------separable-----------------------") mat = Arff("./separableIsSquare.arff", label_count=1) np_mat = mat.data data = mat[:, :-1] labels = mat[:, -1].reshape(-1, 1) print(data[:, 1]) print(labels) ### Make the Classifier ##### P3Class = None for lr in range(10, 0, -1): P3Class = PerceptronClassifier(lr=0.1*lr, shuffle=False) P3Class.fit(data, labels, standard_weight_value=None) Accuracy = P3Class.score(data, labels) print("Learning Rate = ", 0.1*lr) print("Accuracy = [{:.2f}]".format(Accuracy)) print("Epochs = ", P3Class.get_epochs_trained()) # print(P3Class) ## could not get graphing to work in time... # graph(data[:, 0], data[:, 1], labels=mat[:, -1]) w = P3Class.get_weights() y = lambda x: (-w[0]/w[1])*x - (w[2]/w[1]) grapher = Grapher() grapher.graph(data[:, 0], data[:, 1], labels=mat[:, -1], title="Separable") grapher.add_function(y) grapher.show("separable.svg")
def inseparable(): print("----------------Inseparable-----------------------") mat = Arff("./impossible.arff", label_count=1) np_mat = mat.data data = mat[:, :-1] labels = mat[:, -1].reshape(-1, 1) ### Make the Classifier ##### P4Class = None for lr in range(10, 0, -1): P4Class = PerceptronClassifier(lr=0.1*lr, deterministic=10, shuffle=False) P4Class.fit(data, labels, standard_weight_value=None) Accuracy = P4Class.score(data, labels) print("Learning Rate = ", 0.1*lr) print("Accuracy = [{:.2f}]".format(Accuracy)) print("Epochs = ", P4Class.get_epochs_trained()) w = P4Class.get_weights() y = lambda x: (-w[0]/w[1])*x - (w[2]/w[1]) grapher = Grapher() grapher.graph(data[:, 0], data[:, 1], labels=mat[:, -1], title="Inseparable") grapher.add_function(y) grapher.show("Inseparable.svg")
def runMahCode(arff, shuffle=True, determ=0, training=False, lr=.1, quiet=False): mat = Arff(arff,label_count=1) data = mat.data[:,0:-1] labels = mat.data[:,-1:] PClass = PerceptronClassifier(lr=lr,shuffle=shuffle,deterministic=determ) Accuracy = 0.0 if (training): X_train, y_train, X_test, y_test = PerceptronClassifier.split_training(data,labels) PClass.fit(X_train,y_train) Accuracy = PClass.score(X_test,y_test) else: PClass.fit(data,labels) Accuracy = PClass.score(data,labels) if not quiet: print("Accuracy = [{:.5f}]".format(Accuracy)) print("Final Weights =",PClass.get_weights()) else: return Accuracy
def testing(num): trainData = samples.loadImagesFile("data/digitdata/trainingimages", num, 28, 28) trainLabels = samples.loadLabelsFile("data/digitdata/traininglabels", num) testData = samples.loadImagesFile("data/digitdata/testimages", 1000, 28, 28) testLabels = samples.loadLabelsFile("data/digitdata/testlabels", 1000) validData = samples.loadImagesFile("data/digitdata/validationimages", 1000, 28, 28) validLabels = samples.loadLabelsFile("data/digitdata/validationlabels", 1000) perceptron=PerceptronClassifier(trainData, trainLabels,0) perceptron.train(trainData, trainLabels,10) print "===================================" print "Test Data" guess=perceptron.classify(testData) samples.verify(perceptron, guess, testLabels) print "===================================" print "Validation Data" guess=perceptron.classify(validData) samples.verify(perceptron,guess,validLabels)
def testing(num): trainData = samples.loadImagesFile("data/facedata/facedatatrain", num, 60, 70) trainLabels = samples.loadLabelsFile("data/facedata/facedatatrainlabels", num) testData = samples.loadImagesFile("data/facedata/facedatatest", 150, 60, 70) testLabels = samples.loadLabelsFile("data/facedata/facedatatestlabels", 151) validData = samples.loadImagesFile("data/facedata/facedatavalidation", 301, 60, 70) validLabels = samples.loadLabelsFile("data/facedata/facedatavalidationlabels", 301) perceptron=PerceptronClassifier(trainData, trainLabels,0) perceptron.train(trainData, trainLabels,10) print "===================================" print "Test Data" guess=perceptron.classify(testData) samples.verify(perceptron, guess, testLabels) print "===================================" print "Validation Data" guess=perceptron.classify(validData) samples.verify(perceptron,guess,validLabels)
def test_02_from_dataset_02(self): """Verify that a classifier can be constructed with initial weights for a fiven dataset.""" expected_weights = {'highly': 0, 'boring': 0, 'green': 0, 'eggs': 0} classifier = PerceptronClassifier.from_dataset( self.small_dataset_train_2) if classifier is None: self.fail( msg= 'Constructing classifier for dataset failed: from_dataset returned None' ) self.assertEqual(classifier.weights, expected_weights)
def test_04_update_02(self): """Verify that the perceptron update is performed correctly.""" classifier = PerceptronClassifier({'highly': 1, 'boring': -1}) classifier.update(self.small_instance_list_do_update[1]) expected_weigths = {'highly': 1, 'boring': 0} self.assertEqual(classifier.weights, expected_weigths) classifier = PerceptronClassifier({'highly': 1, 'boring': -1}) do_update = classifier.update(self.small_instance_list_no_update[1]) self.assertEqual(False, do_update)
class Perceptron: def __init__(self): iris = datasets.load_iris() x = iris.data[0:150] t = iris.target[0:150] for i in range(150): if t[i] == 0: t[i] = -1 if t[i] == 2: t[i] = 1 self.classifier = PerceptronClassifier(4, 0.1) self.classifier.learn(t, x) def predict(self, x): y = self.classifier.classify(x) if y == -1: return 'setosa' else: return None
def our_avg_run(avg_num_of_run,filename): dataset = load_dataset(filename) ptraining_error = [] perceptron_error = [] for i in range(avg_num_of_run): (training_set, testing_set) = split_dataset(dataset, PROBABILITY_TRAINING_SET) testing_set = dataset if IS_VERBOSE: print("training set size: %s testing set size: %s num instances: %s" % (len(training_set), len(testing_set), len(dataset))) (train_x, train_y) = split_attribute_and_label(training_set) (test_x, test_y) = split_attribute_and_label(testing_set) p = PerceptronClassifier(ETA, THRESHOLD, UPPER_BOUND, False) p.fit(train_x, train_y) t_result_list = p.predict(train_x) ptraining_error.append(calculate_error(train_y, t_result_list)) result_list = p.predict(test_x) perceptron_error.append(calculate_error(test_y, result_list)) print(p.weights) return sum(perceptron_error) / len(perceptron_error) , sum(ptraining_error) / len(ptraining_error)
def test_04_update_01(self): """Verify that the perceptron update is performed correctly.""" classifier = PerceptronClassifier({'highly': 1, 'boring': -1}) # Test document: ("highly", "doc25", -1) classifier.update(self.small_instance_list_do_update[0]) expected_weigths = {'highly': 0, 'boring': -1} self.assertEqual(classifier.weights, expected_weigths) classifier = PerceptronClassifier({'highly': 1, 'boring': -1}) # Test document: ("boring", "doc26", 1), do_update = classifier.update(self.small_instance_list_no_update[0]) self.assertEqual(False, do_update)
def nltk_movie_review_accuracy(num_iterations): """ Try different number of features, and optimize number of training iterations.""" return 0, 0 # TODO: Exercise 4: remove line (training_documents, dev_documents, test_documents) = load_reviews() best_development_accuracy = 0.0 best_num_features = 0 best_classifier = None best_feature_set = None # Test different numbers of features. for n in [100, 1000, 10000]: print("Training with %d features..." % n) # Training set training_set = Dataset.from_document_collection(training_documents, num_features=n) # Development set development_set = Dataset.from_document_collection( dev_documents, feature_set=training_set.feature_set) # Train classifier classifier = PerceptronClassifier.from_dataset(training_set) pass # TODO: Exercise 4: train the classifier # Accuracies of classifier with n features train_accuracy = classifier.test_accuracy(training_set) development_accuracy = classifier.test_accuracy(development_set) if development_accuracy > best_development_accuracy: best_development_accuracy = development_accuracy best_num_features = n best_classifier = classifier.copy() best_feature_set = training_set.feature_set print( "Best classifier with %d features: \t Train Accuracy: %.4f \t Dev Accuracy: %.4f" % (n, train_accuracy, best_development_accuracy)) print("Best number of features: %d " % best_num_features) print("Top features for positive class:") print(best_classifier.features_for_class(True)) print("Top features for negative class:") print(best_classifier.features_for_class(False)) # Compute test score for best setting. testing_set = Dataset.from_document_collection( test_documents, feature_set=best_feature_set) testing_accuracy = best_classifier.test_accuracy(testing_set) print("Test score for best setting: %.4f" % testing_accuracy) return best_development_accuracy, testing_accuracy
def debug(): print("------------arff-------------------") mat = Arff("../data/perceptron/debug/linsep2nonorigin.arff", label_count=1) data = mat.data[:, 0:-1] labels = mat.data[:, -1].reshape(-1, 1) PClass = PerceptronClassifier( lr=0.1, shuffle=False, deterministic=10, printIt=False) PClass.fit(data, labels) Accuracy = PClass.score(data, labels) print("Accuray = [{:.2f}]".format(Accuracy)) print("Final Weights =", PClass.get_weights())
def voting(): print("--------------voting---------------------") mat = Arff("../data/perceptron/vote.arff", label_count=1) np_mat = mat.data avg = [] for iteration in range(5): print("xxxxxxxxxxx " + str(iteration) + " xxxxxxxx") training, testing = _shuffle_split(mat.data, .3) data = training[:, :-1] labels = training[:, -1].reshape(-1, 1) P5Class = PerceptronClassifier(lr=0.1, shuffle=True) P5Class.fit(data, labels) Accuracy = P5Class.score(data, labels) print("Accuracy = [{:.2f}]".format(Accuracy)) print("Epochs = ", P5Class.get_epochs_trained()) tData = testing[:, :-1] tLabels = testing[:, -1].reshape(-1, 1) tAccuracy = P5Class.score(tData, tLabels) print("Test Accuracy = [{:.2f}]".format(tAccuracy)) weights = P5Class.get_weights() print(weights) sort_weights = sorted(zip(weights, list(range(len(weights)))), key=lambda x: abs(x[0]), reverse=True) print("sorted:\r\n", sort_weights) scores = P5Class.getTrace().getColumns("epochScore") print('scores', scores) avg.append((float(scores[-2][0]) - float(scores[0][0])) / len(scores)) print('avg', avg) grapher = Grapher() grapher.graph(list(range(len(avg))), avg, labels=[1]*len(avg), points=False, title="Average Scores", xlabel="Iteration", ylabel="score") grapher.show("AverageScores.svg")
def evaluation(): print("--------------arf2------------------------------") mat = Arff("../data/perceptron/evaluation/data_banknote_authentication.arff", label_count=1) np_mat = mat.data data = mat[:, :-1] labels = mat[:, -1].reshape(-1, 1) #### Make Classifier #### P2Class = PerceptronClassifier(lr=0.1, shuffle=False, deterministic=10) P2Class.fit(data, labels) Accuracy = P2Class.score(data, labels) print("Accuray = [{:.2f}]".format(Accuracy)) print("Final Weights =", P2Class.get_weights())
def testing(num): trainData = samples.loadImagesFile("data/digitdata/trainingimages", num, 28, 28) trainLabels = samples.loadLabelsFile("data/digitdata/traininglabels", num) testData = samples.loadImagesFile("data/digitdata/testimages", 1000, 28, 28) testLabels = samples.loadLabelsFile("data/digitdata/testlabels", 1000) validData = samples.loadImagesFile("data/digitdata/validationimages", 1000, 28, 28) validLabels = samples.loadLabelsFile("data/digitdata/validationlabels", 1000) perceptron=PerceptronClassifier(trainData, trainLabels,0) perceptron.train(trainData, trainLabels,10) print "***********************************" print "*************Test Data*************" guess=perceptron.classify(testData) samples.verify(perceptron, guess, testLabels) print "***********************************" print "************Valid Data*************" guess=perceptron.classify(validData) samples.verify(perceptron,guess,validLabels)
def testing(num): trainData = samples.loadImagesFile("data/facedata/facedatatrain", num, 60, 70) trainLabels = samples.loadLabelsFile("data/facedata/facedatatrainlabels", num) testData = samples.loadImagesFile("data/facedata/facedatatest", 150, 60, 70) testLabels = samples.loadLabelsFile("data/facedata/facedatatestlabels", 151) validData = samples.loadImagesFile("data/facedata/facedatavalidation", 301, 60, 70) validLabels = samples.loadLabelsFile( "data/facedata/facedatavalidationlabels", 301) perceptron = PerceptronClassifier(trainData, trainLabels, 0) perceptron.train(trainData, trainLabels, 10) print "===================================" print "Test Data" guess = perceptron.classify(testData) samples.verify(perceptron, guess, testLabels) print "===================================" print "Validation Data" guess = perceptron.classify(validData) samples.verify(perceptron, guess, validLabels)
from perceptron import PerceptronClassifier from arff import Arff import numpy as np mat = Arff("../data/perceptron/evaluation/data_banknote_authentication.arff", label_count=1) data = mat.data[:, 0:-1] labels = mat.data[:, -1:] PClass = PerceptronClassifier(lr=0.1, shuffle=False, deterministic=10) PClass.fit(data, labels) Accuracy = PClass.score(data, labels) print("Accuray = [{:.5f}]".format(Accuracy)) print("Final Weights =", PClass.get_weights())
DIR: 'data/digitdata', HEIGHT: 20, WIDTH: 29, LABEL: 10, PIXELS: None } } samples = Samples(map.get(inp).get(DIR)) dataClassifier = DataClassifier( map.get(inp).get(HEIGHT), map.get(inp).get(WIDTH), map.get(inp).get(LABEL), map.get(inp).get(PIXELS)) perceptronClassifier = PerceptronClassifier(dataClassifier.FEATURES, dataClassifier.LABELS) samples.readFiles() dataset = 0 featureValueListForAllTrainingImages, actualLabelForTrainingList = dataClassifier.extractFeatures( samples.train_lines_itr, samples.train_labelsLines_itr) TOTALDATASET = len(featureValueListForAllTrainingImages) INCREMENTS = int(TOTALDATASET * PERCENT_INCREMENT / 100) PERCEPTRON_TIME = {} while dataset < TOTALDATASET: startTimer = time.time() print("Training ON {0} to {1} data".format(dataset,
plt.xlabel("X: pH of Coffee") plt.show(); # add the plot later # # 3. Train on both sets # Linearly Separable: # In[9]: data = linSep[:,:-1] labels = linSep[:,-1:] for lr in np.linspace(.01, 1, 10): PClass = PerceptronClassifier(lr=lr,shuffle=True,deterministic=0) PClass.fit(data,labels) Accuracy = PClass.score(data,labels) print(Accuracy) # Because everything is centered very close to zero, it doesn't take very long for the learning rate to differentiate the two sets. # # We also see that the learning rate doesn't change much. # In[10]: data = nonLin[:,:-1] labels = nonLin[:,-1:] for lr in np.linspace(.01, 1, 10):
import matplotlib.pyplot as plt from utils import DatasetGenerator from perceptron import PerceptronClassifier import numpy as np discr_func = 'ellipse' # choose one of ['linear', 'ellipse', 'quadratic'] data_generator = DatasetGenerator() X, y = data_generator.generate_separable_data(num_points=57, discr_func=discr_func) clf = PerceptronClassifier(discr_func=discr_func) clf.fit(X, y) plt.xlim(-14, 14) plt.ylim(-14, 14) # plot data points plt.scatter(X[:, 0], X[:, 1], s=25, c=y) # plot separating curve xmin, xmax = plt.xlim() ymin, ymax = plt.ylim() xlist = np.linspace(xmin, xmax, 100) ylist = np.linspace(ymin, ymax, 100) XX, YY = np.meshgrid(xlist, ylist) xy = np.vstack([XX.ravel(), YY.ravel()]).T Z = clf.score(xy).reshape(XX.shape) plt.contour(XX, YY, Z, levels=[0], colors=['r']) plt.show()
import statistics import time import numpy as np import p3_utils from load_data import FaceData from perceptron import PerceptronClassifier if __name__ == '__main__': iterations = 3 legalLabels = range(2) face_data = FaceData("facedata") classifier = PerceptronClassifier(legalLabels, iterations) featureFunction = face_data.basic_feature_extractor trainingDataList = list(map(featureFunction, face_data.face_train_imgs)) validationDataList = list( map(featureFunction, face_data.face_validation_imgs)) testDataList = list(map(featureFunction, face_data.face_test_imgs)) n_train = len(trainingDataList) classifier.set_weights(range(2), FaceData.FACE_DATUM_WIDTH, FaceData.FACE_DATUM_HEIGHT) # Conduct training and testing percentages, runtimes = ([], []) for n, n_samples in enumerate( range(n_train // 10, n_train + 1, n_train // 10)): start_time = time.time()
def test_03_prediction_02(self): """Verify that the predictions of the classifier are as expected.""" classifier = PerceptronClassifier({'highly': 1, 'boring': -2}) for instance in self.small_dataset_pred_test_2.instance_list: prediction = classifier.prediction(instance.feature_counts) self.assertEqual(prediction, instance.label)
from perceptron import PerceptronClassifier from arff import Arff import numpy as np mat = Arff("standardVoting.arff", label_count=1) data = mat.data[:, 0:-1] labels = mat.data[:, -1:] PClass = PerceptronClassifier(lr=0.1) PClass.fit(data, labels) Accuracy = PClass.score(data, labels) print("Accuray = [{:.5f}]".format(Accuracy)) print("Final Weights =", PClass.get_weights())
def __init__(self, legalLabels, maxIterations): PerceptronClassifier.__init__(self, legalLabels, maxIterations) self.weights = util.Counter()
# its settings if __name__ == "__main__": # mat = Arff("linsep2nonorigin.arff", label_count=1) # mat = Arff("data_banknote_authentication.arff", label_count=1) mat = Arff("votingMissingValuesReplaced.arff", label_count=1) # mat = Arff("test2.arff", label_count=1) data = mat.data[:, 0:-1] labels = mat.data[:, -1].reshape(-1, 1) X_train, y_train, X_eval, y_eval = split(data, labels) # split data in 70/30 # PClass = PerceptronClassifier(lr=0.1, shuffle=False, deterministic=10) #initialize perceptron with settings # PClass = PerceptronClassifier(lr=0.1, shuffle=False) PClass = PerceptronClassifier(lr=0.1, shuffle=True) row, col = data.shape # using all zeros initial weight, if not provided, it will do random initial_weight = np.zeros((col + 1, 1)) PClass.fit(X_train, y_train, initial_weight) # train the perceptron # PClass.fit(data, labels, initial_weight) # graph(data[:,0].reshape(-1,1),data[:,1].reshape(-1,1),labels, "linearly separable") # plotting scatter plot # index = list(range(1, PClass.num_epoch + 1)) # plotting scatter plot with each epoch's error during training # plt.scatter(index, PClass.epoch_error) # plt.plot(index, PClass.epoch_error) # plt.show() # y = lambda x: -1.3333* x # plotting the line # y = lambda x: 0.8181 * x # graph_function(y)
import sys sys.path.append('../') from tools import arff from perceptron import PerceptronClassifier # arff_file = "data_banknote_authentication.arff" # arff_file = "dataset_1.arff" # arff_file = "dataset_2.arff" arff_file = "voting_data.arff" mat = arff.Arff(arff_file) np_mat = mat.data data = mat[:, :-1] labels = mat[:, -1].reshape(-1, 1) #### Make Classifier #### P2Class = PerceptronClassifier(None, lr=0.1, shuffle=True) X_train, y_train, X_test, y_test = P2Class._train_test_split(data, labels, 70) P2Class.fit(X_train, y_train) misclassifications = P2Class.misclassifications print('Train split accuracy: {}'.format(P2Class.score(X_train, y_train))) print('Test split accuracy: {}'.format(P2Class.score(X_test, y_test))) print("Final Weights =", P2Class.get_weights()) for i in range(len(misclassifications)): print('epoch: {}, {}'.format(i + 1, misclassifications[i])) # Accuracy = P2Class.score(data,labels) # print("Accuray = [{:.2f}]".format(Accuracy))
elif y[i, 0] == 2: if out3[i, 0] == 1 and out1[i, 0] == 0 and out2[i, 0] == 0: correct += 1 elif out3[i, 0] == 1 and net3[i, 0] > net1[i, 0] and net3[ i, 0] > net2[i, 0]: correct += 1 return correct / row if __name__ == "__main__": mat = Arff("iris.arff", label_count=1) data = mat.data[:, 0:-1] labels = mat.data[:, -1].reshape(-1, 1) X_train, y_train, X_eval, y_eval = split(data, labels) P1 = PerceptronClassifier(lr=0.1, shuffle=True) # train each perceptron y = process_label(y_train, 0) row, col = X_train.shape initial_weight = np.zeros((col + 1, 1)) P1.fit(X_train, y, initial_weight) print("P1 training accuracy = ", P1.score(X_train, y)) P2 = PerceptronClassifier(lr=0.1, shuffle=True) y = process_label(y_train, 1) row, col = X_train.shape initial_weight = np.zeros((col + 1, 1)) P2.fit(X_train, y, initial_weight) print("P2 training accuracy = ", P2.score(X_train, y)) P3 = PerceptronClassifier(lr=0.1, shuffle=True) y = process_label(y_train, 2)