def perceptron_accuracy(weights, ap=False): accuracy = 0 right = 0 for i in range(0, 208): if not ap: v = p.predict(data[i], weights=weights) else: v = p.predict(data[i], weights=weights, ap=True) """ if not ap: if v >= .5: v = 1 if v < .5: v = 0 else: if v < .5: v = 1 if v >= .5: v = 0 """ if not ap: if v == data[i][-1]: right += 1 else: if v != data[i][-1]: right += 1 accuracy = (right / 208) * 100 return accuracy
def test_predict(): xx = np.array([[3, 1], [1, 3]]) w = np.array([3, -3]) b = 0 predictions = perceptron.predict(xx, w, b) assert predictions[0] == 1 assert predictions[1] == 0
def test_train(): w, b = np.array([.0, .0]), .0 w, b, costs = trainer.train(allx(), ally(), w, b, 1000) assert almost_eq(costs[-1], 0.2) predictions = perceptron.predict(np.array([[3, 1], [1, 3]]), w, b) assert predictions[0] == 1 assert predictions[1] == 0
def accuracy(data, weights, K): correct = 0 for i in range(0, len(data)): label = data[i]['label'] prediction = predict(data, weights, i, K) if int(label) == prediction: correct += 1 return float(correct)/len(data)
def evaluate_circuit(n = 208, eval_perceptron = True): test_idxs = np.random.choice(data.shape[0], n, replace = False) correct_circuit = 0 accuracy_circuit = 0.0 correct_perceptron = 0 accuracy_perceptron = 0.0 #test_idxs = 208 for i in test_idxs: #print(trial(weights, p_weights, ap_weights, i, plot = False)) #print(data[i][-1]) #print() trial_result = trial(weights, p_weights, ap_weights, i, plot = False, num = 1) if trial_result[0] == data[i][-1]: correct_circuit += 1 if(eval_perceptron): #This uses multiple noisy training rows over n epochs, should we instead train the perceptron on a single noisy row n times? perceptron_training_data = data[test_idxs, :-1] perceptron_noisy_data = perceptron_training_data + np.random.rand(n, 60) classes = data[[test_idxs], [-1]] classes = classes.reshape(n, 1) perceptron_noisy_data = np.append(perceptron_noisy_data, classes, axis = 1) perceptron = p.gradient_descent(perceptron_noisy_data, 0.1, trial_result[1]) if p.predict(data[i], perceptron) == data[i][-1]: correct_perceptron += 1 #Train perceptron on noisy data for as many epochs as it took the circuit to decide #determine perceptrons accuracy #print(perceptron) accuracy_circuit = (correct_circuit / n) * 100 accuracy_perceptron = (correct_perceptron / n) * 100 print("circuit accuracy: ", accuracy_circuit) print("perceptron accuracy:", accuracy_perceptron)
# Dans le format de votre classifier TP_perceptron = [] FP_perceptron = [] TP_KNNWeight = [] FP_KNNWeight = [] TP_KNN = [] FP_KNN = [] TP_NN = [] FP_NN = [] Xtr, ytr, Xte, yte = split(X, y) for k in range(len(np.unique(yte, return_counts=False))): labels_k = perceptron.two_classes(ytr, k) weights, errors = perceptron.train(Xtr, labels_k, with_errors=True) pred = np.array([perceptron.predict(weights, x) for x in Xte]) TP_perceptron.append(true_positive_perceptron(pred, yte, k)) FP_perceptron.append(false_negative_perceptron(pred, yte, k)) # For each class we have a TP and FP S = [0.1, 0.2, 0.5, 1, 2, 5] K = [2, 3, 4, 5, 10, 15] z1 = predict(Xte, Xtr, ytr) TP_NN.append(true_positive(z1, yte)) FP_NN.append(false_negative(z1, yte)) s = 0.1 k = 10 Z1 = [] Z2 = []
from perceptron import train, predict # We can use any set of Xs and Ys to fit the perceptron # A perceptron training will only converge if the relationship between X&Y is linear # (i.e., linearly separable data) # Data given for the assignment X = [[0.25, 0.353], [0.25, 0.471], [0.5, 0.353], [0.5, 0.647], [0.75, 0.705], [0.75, 0.882], [1, 0.705], [1, 1]] Y = [0, 1, 0, 1, 0, 1, 0, 1] W, b = train(X, Y, learning_rate=0.1) # Checking if the converged values are correct print('Prediction is correct?', predict(X, W, b) == Y)
print("Loaded data.") X_train, Y_train = reviews_to_features(training_set) print("Featurized training data.") weights, losses = perceptron.train(X_train, Y_train, iterations=ITERATIONS, eta=ETA) print("Done training.") X_test, Y_test = reviews_to_features(dev_set) print("Featurized test data.") test_scores = perceptron.score(X_test.T, weights) test_sentiments = perceptron.predict(test_scores) (accuracy, recall, precision, f1, false_positive_rate, false_negative_rate) = perceptron.test(Y_test, test_sentiments) print( f"Predicted scores w/ threshold = 0.5, iterations = {ITERATIONS}, eta = {ETA}:" ) print(f" - accuracy : {accuracy}") print(f" - recall : {recall}") print(f" - precision : {precision}") print(f" - f1 : {f1}") print(f" - fpr : {false_positive_rate}") print(f" - fnr : {false_negative_rate}") if ENABLE_ROC: fpr = [] fnr = []
x.fillna(0, inplace=True) y = np.squeeze(a4a.iloc[:, 0]) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0) decay_grid = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] a4a_evaluation = dict() weight_matrix = dict() for i, decay_rate in enumerate(decay_grid): w = perceptron(x_train, y_train, decay_rate=decay_rate) weight_matrix[decay_rate] = w y_pred = predict(x_test, w) accuracy, misclassification_rate = measure_accuracy(y_pred, y_test) a4a_evaluation[decay_rate] = [ round(accuracy, 2), round(misclassification_rate, 2) ] a4a_evaluation_df = pd.DataFrame(a4a_evaluation, index=['Accuracy', 'Misclassification Rate']) labels = list(a4a_evaluation.keys()) acc = list(a4a_evaluation_df.loc['Accuracy'].values) err = list(a4a_evaluation_df.loc['Misclassification Rate'].values) x = np.arange(len(labels))
n = 10 d = 2 data0 = np.random.randn(n, d) + 3 data1 = np.random.randn(n, d) - 3 data = np.concatenate([data0, data1]) labels = np.concatenate([np.zeros(n), np.ones(n)]) # Deux classes etiquettees 0 et 1 labels = perceptron.two_classes(labels, 0) # Deux classes etiquettees -1 et 1 weights, errors = perceptron.train(data, labels, with_errors=True) print(weights) for i in range(data.shape[0]): print(i, labels[i], perceptron.predict(weights, data[i]), data[i]) pyplot.scatter(data0[:, 0], data0[:, 1], marker="x", color="r", s=100) pyplot.scatter(data1[:, 0], data1[:, 1], marker="*", color="b", s=100) x0 = 0 y0 = -weights[2] / weights[1] x1 = -weights[2] / weights[0] y1 = 0 a = (y1 - y0) / (x1 - x0) b = y0 pyplot.plot([-10, +10], [-10 * a + b, +10 * a + b], color="g") pyplot.xlim(-6, 6) pyplot.ylim(-6, 6) pyplot.show() pyplot.plot(errors)
label_list = label_array.tolist() return size, label_list # calculate accuracy def accuracy(predictions, targets, size): correct_num = 0 for i in range(size): if predictions[i] == targets[i]: correct_num += 1 return correct_sum / size if __name__ == "__main__": train_size, train_label = read_label("train_label") train_size, rows, cols, train_data = read_data("train_data") test_size, test_label = read_label("test_label") test_size, rows, cols, test_data = read_data("test_data") # learning rate = 0.1 print("Learning rate = 0.1") weights = perceptron.train_perceptron(train_data, train_label, train_size, rows, cols, 0.1) print( "Training accuracy", accuracy(perceptron.predict(weights, train_size, train_data), train_label, train_size)) print( "Testing accuracy", accuracy(perceptron.predict(weights, test_size, test_data), test_label, test_size))
def main(): #Prove accuracy by comparing to perceptron and running over all examples #For a row n in the dataset #for row in dataset: #row = dataset[162] v = np.array([[0, 0, 0, 0]]).T #v_hist = np.array([[0, 0, 0, 0,]]).T tau = 1 dt = 0.05 steps = 0 v_hist = np.array([[0, 0, 0, 0]]).T #Repeat until the decision network makes a classification - output switch units, # *need to decide when a decision is made #Now, a decision is reached when the activation of unit 3 or 4 is >= 1.1 while (v[3][0] <= 1.1) and (v[2][0] <= 1.1) and (steps < 10): steps += 1 row = dataset[162] #Make sure the data isnt negative noise = np.random.rand(1, 60) noise = np.append(noise, 0) #print(noise) #Add noise to the row noisyRow = np.add(noise, row) #Let the perceptron and antiperceptron predict it - unthresholded p_prediction = p.predict(noisyRow, p_weights) ap_prediction = p.predict(noisyRow, ap_weights) #feed those values into the decision network activations = weights @ v print(activations) activations[0][0] = p_prediction + activations[0][0] activations[1][0] = ap_prediction + activations[1][0] activations = sigmoid(l, activations, bias) v_hist = np.concatenate((v_hist, v), axis=1) #if (steps > 5): # print(signal.savgol_filter(v_hist[3,:], 5, 4)) #if (steps == 5): # print(p_prediction) # print(ap_prediction) # print(v_hist) dv = tau * ((-v + activations) * dt + noise * np.sqrt(dt) * np.random.normal(0, 1, (4, 1))) # add noise using np.random v = v + dv if (v[3][0] > 1) or (v[2][0] > 1): break plt.figure() plt.plot(v_hist[0, :], dashes=[2, 2]) plt.plot(v_hist[1, :], dashes=[1, 1]) plt.plot(v_hist[2, :], dashes=[2, 2]) plt.plot(v_hist[3, :], dashes=[3, 3]) #plt.plot(v2_v1_diff, dashes = [5,5]) plt.legend(["v1", "v2", "v3", "v4"], loc=0) plt.ylabel("activation") plt.xlabel("steps") plt.grid('on') plt.show() #plt.figure() #smoothed_v4 = signal.savgol_filter(v_hist[3,:], 901, 4) #smoothed_v3 = signal.savgol_filter(v_hist[2,:], 901, 4) #plt.plot(smoothed_v4) #plt.plot(smoothed_v3) #plt.legend(["smooth v4", "smooth v3"], loc = 0) #plt.grid('on') #plt.show() print(steps) print(steps * 60)
def file_main(): data_set = [['ACD', 0.0231, 1.157, 0.919, 93.061, 0.0917], ['ACD', 0.0296, 1.1183, 0.9356, 80.9492, 0.0681], ['ACD', 0.0471, 1.3537, 1.0208, 108.7305, 0.091], ['ACD', 0.0165, 1.2621, 1.1879, 116.3081, 0.1154], ['ACD', 0.0236, 1.117, 0.8673, 77.9446, 0.066], ['ACD', 0.008, 1.413, 1.0474, 102.6556, 0.07], ['ACD', 0.0267, 1.4068, 1.1244, 107.5716, 0.0734], ['ACD', 0.0838, 1.1258, 1.0406, 100.2574, 0.0474], ['ACD', 0.0225, 1.2126, 0.9824, 98.885, 0.0928], ['ACD', 0.0639, 2.1101, 1.2162, 137.5727, 0.159], ['ACD', 0.0021, 0.8333, 0.7004, 68.5042, 0.0464], ['ACD', 0.0208, 1.5963, 1.0204, 142.5501, 0.1329], ['HM', 0.461, 2.1225, 1.5204, 133.2334, 0.0623], ['HM', 0.2118, 1.5373, 1.2326, 99.011, 0.0808], ['HM', 0.2308, 2.3465, 1.3419, 106.459, 0.0548], ['HM', 0.5372, 2.171, 1.8759, 135.6919, 0.0602], ['HM', 0.318, 2.1527, 1.1671, 130.0122, 0.0651], ['HM', 0.2434, 2.3092, 1.6817, 179.5259, 0.1192], ['HM', 0.4191, 1.5634, 0.8894, 117.2704, 0.0265], ['HM', 0.5952, 2.6538, 1.5957, 152.4041, 0.0752], ['HM', 0.3963, 2.0715, 1.2956, 124.8764, 0.094], ['HM', 0.1638, 1.8827, 1.0938, 105.0277, 0.0384], ['HM', 0.2752, 3.0803, 1.6789, 146.2936, 0.0803], ['HM', 0.4227, 1.6529, 0.8303, 84.3475, 0.0399]] # # decision tree training set # data_set = [] # # Arthur Conan Doyle # data_set.append(process("lost_world.txt", "ACD")) # data_set.append(process("sherlock.txt", "ACD")) # data_set.append(process("study_in_scarlet.txt", "ACD")) # data_set.append(process("baskervilles.txt", "ACD")) # data_set.append(process("sign_four.txt", "ACD")) # data_set.append(process("return.txt", "ACD")) # data_set.append(process("memoirs.txt", "ACD")) # data_set.append(process("valley.txt", "ACD")) # data_set.append(process("tales_terror.txt", "ACD")) # data_set.append(process("white_company.txt", "ACD")) # data_set.append(process("last_bow.txt", "ACD")) # data_set.append(process("boer_war.txt", "ACD")) # # # Herman Melville # data_set.append(process("moby_dick.txt", "HM")) # data_set.append(process("bartleby.txt", "HM")) # data_set.append(process("confidence_man.txt", "HM")) # data_set.append(process("pierre.txt", "HM")) # data_set.append(process("white_jacket.txt", "HM")) # data_set.append(process("typee.txt", "HM")) # data_set.append(process("battle_pieces.txt", "HM")) # data_set.append(process("redburn.txt", "HM")) # data_set.append(process("omoo.txt", "HM")) # data_set.append(process("israel_potter.txt", "HM")) # data_set.append(process("my_chimney.txt", "HM")) # data_set.append(process("mardi.txt", "HM")) # Decision tree test data shuffle(data_set) # Read test data and process it file = open("test_set_file.txt", "r") test_data = [] get_test_data(file, test_data) processed_test_data = [] for data_point in test_data: processed_test_data.append( process_string([data_point.author], data_point.data)) correct_count = 0 total_count = 0 # Decision tree classifier usage decision_tree = dt.DecisionTree(data_set, ["HM", "ACD"], 4) processed_test_data.extend(data_set) shuffle(processed_test_data) for data_point in processed_test_data: total_count += 1 node = decision_tree.tree while node.FINAL_LABEL == "": if data_point[node.att_index] <= node.threshold: node = node.left elif data_point[node.att_index] > node.threshold: node = node.right if node.FINAL_LABEL == data_point[0]: correct_count += 1 # Perceptron classifier processed_test_data.extend(copy.deepcopy(data_set)) for data_point in data_set: if "ACD" in data_point[0]: data_point[0] = 1 elif "HM" in data_point[0]: data_point[0] = 0 for data_point in processed_test_data: if "ACD" in data_point[0]: data_point[0] = 1 elif "HM" in data_point[0]: data_point[0] = 0 weights = pt.train_perceptron(processed_test_data, 0.01, 20000) for data_point in processed_test_data: total_count += 1 prediction = pt.predict(data_point, weights) if data_point[0] == int(prediction): correct_count += 1 print("Perceptron weights: " + str(weights)) print("Total correct: " + str((correct_count / total_count) * 100)) print("done")
def runTrials(trial, trainingDigits, testingDigits, testingLabelsData , flattenedTestDigits, percent): testIndices = [] guesses = [] #Start timing bayes bayesStart = time.process_time() learningDigits = classifier.pickData(trainingDigits, percent) formattedDigits = classifier.getFormattedTraining(learningDigits) condProbCounters = classifier.getCondProbs(learningDigits) for i in range(0, len(testingDigits)): testIndices.append(i) #lp = LineProfiler() #lp_wrapper = lp(classifier.naiveBayes) #guesses.append(lp_wrapper(i, testingDigits, formattedDigits, percent, condProbCounters)) #lp.print_stats() guesses.append(classifier.naiveBayes(i, testingDigits, formattedDigits, percent, condProbCounters)) #End timing bayes bayesEnd = time.process_time() bayesTrainingTime = bayesEnd - bayesStart numCorrect = 0 for tup in guesses: digitIndex = tup[0] guess = tup[1] if( int(guess) == int(testingLabelsData[digitIndex])): numCorrect+=1 accBayes = (numCorrect/len(testingDigits)) * 100 print("Accuracy for Naive Bayes classifier trial: " + str(trial) + " is: " + str(accBayes)) #Begin perceptron training and prediction: numPixels = classifier.digitRowLen * classifier.digitColLen WVectors = perceptron.initializeWeightVecs(numPixels) #Start training time for perceptron startTime = time.process_time() for iteration in range(0, maxIter): #pool.starmap(perceptron.train, zip(itertools.repeat(WVectors), itertools.repeat(trainingDigits), itertools.repeat(percent))) perceptron.train(WVectors, trainingDigits, percent, iteration) #End training time endTime = time.process_time() perceptronTrainingTime = endTime - startTime numCorrect = 0 for k in range(0, len(flattenedTestDigits)): testDigit = flattenedTestDigits[k] guess = perceptron.predict(WVectors, testDigit) if(guess == int(testingLabelsData[k])): numCorrect+=1 acc = (numCorrect/len(testingDigits)) * 100 print("Accuracy for Perceptron trial: " + str(trial) + " is: " + str(acc)) percentFolderPath = "{0:d} percent/".format(int(percent*100)) with open("trainingData/" + percentFolderPath + "output{0:d}.txt".format(trial), "w+") as f: with redirect_stdout(f): print("%s %s %s %s" % ("Bayes Acc: ", str(accBayes) , "Bayes training time: ", str(bayesTrainingTime))) print("%s %s %s %s" % ("Percep Acc: ", str(acc), "Percep training time: ", str(perceptronTrainingTime)))
# instance perceptron perceptron = perceptron.Perceptron() perceptron.input_length = 4 # set input length # prepare dataset iris_dataset = generate_iris_dataset() ts_input_iris = np.array([specs[0] for specs in iris_dataset]) # input data ts_output_iris = np.array([specs[1] for specs in iris_dataset]) # expected output data # train perceptron.train(ts_input_iris, ts_output_iris) # test al dataset def test_all(): for ts_input, expected in zip(ts_input_iris, ts_output_iris): output = perceptron.predict(ts_input) expected = 'OK' if expected == output else 'FAIL' iris_type_name = "Iris-setosa" if output == 1 else "Iris-versicolor" print(f'Input:{ts_input} Output: {output} = {iris_type_name}') # you can test a iris measurements p_input = [5.9, 3.0, 4.2, 1.0] if (perceptron.predict(p_input) == 1): print("Is a Iris setosa") else: print("Is a Iris versicolor")
def main(): data_set = [['ACD', 0.0231, 1.157, 0.919, 93.061, 0.0917], ['ACD', 0.0296, 1.1183, 0.9356, 80.9492, 0.0681], ['ACD', 0.0471, 1.3537, 1.0208, 108.7305, 0.091], ['ACD', 0.0165, 1.2621, 1.1879, 116.3081, 0.1154], ['ACD', 0.0236, 1.117, 0.8673, 77.9446, 0.066], ['ACD', 0.008, 1.413, 1.0474, 102.6556, 0.07], ['ACD', 0.0267, 1.4068, 1.1244, 107.5716, 0.0734], ['ACD', 0.0838, 1.1258, 1.0406, 100.2574, 0.0474], ['ACD', 0.0225, 1.2126, 0.9824, 98.885, 0.0928], ['ACD', 0.0639, 2.1101, 1.2162, 137.5727, 0.159], ['ACD', 0.0021, 0.8333, 0.7004, 68.5042, 0.0464], ['ACD', 0.0208, 1.5963, 1.0204, 142.5501, 0.1329], ['HM', 0.461, 2.1225, 1.5204, 133.2334, 0.0623], ['HM', 0.2118, 1.5373, 1.2326, 99.011, 0.0808], ['HM', 0.2308, 2.3465, 1.3419, 106.459, 0.0548], ['HM', 0.5372, 2.171, 1.8759, 135.6919, 0.0602], ['HM', 0.318, 2.1527, 1.1671, 130.0122, 0.0651], ['HM', 0.2434, 2.3092, 1.6817, 179.5259, 0.1192], ['HM', 0.4191, 1.5634, 0.8894, 117.2704, 0.0265], ['HM', 0.5952, 2.6538, 1.5957, 152.4041, 0.0752], ['HM', 0.3963, 2.0715, 1.2956, 124.8764, 0.094], ['HM', 0.1638, 1.8827, 1.0938, 105.0277, 0.0384], ['HM', 0.2752, 3.0803, 1.6789, 146.2936, 0.0803], ['HM', 0.4227, 1.6529, 0.8303, 84.3475, 0.0399]] if len(sys.argv) > 1: if sys.argv[1] != "train" and sys.argv[1] != "predict": print("Unknown argument, please enter 'predict' or 'train'") sys.exit(1) elif sys.argv[1] == "train": # Train your model model = input( "Which model would you like to train ? Perceptron(p) or Decision Tree(d): " ) if model != "p" and model != "d": print("Sorry! Wrong argument") elif model == "p": perceptron_data = copy.deepcopy(data_set) for data_point in perceptron_data: if "ACD" in data_point[0]: data_point[0] = 1 elif "HM" in data_point[0]: data_point[0] = 0 shuffle(perceptron_data) weights = pt.train_perceptron(perceptron_data, 0.01, 20000) predict = input( "A perceptron has been trained. Would you like to make a prediction?(y/n) " ) if predict == "y": filename = input( "Please enter the name of the file containing text for author identification: " ) data_value = fp.process(filename, "NA") prediction = pt.predict(data_value, weights) if int(prediction) == 1: print("Author is Arthur Conan Doyle.") elif int(prediction) == 0: print("Author is Herman Melville.") elif model == "d": max_depth = int( input( "Please enter the maximum depth of the decision tree: " )) entropy_cutoff = float( input( "Please enter the entropy cutoff of the decision tree(ideal is 0.0): " )) print("Training a decision tree on training data...") tree = dt.DecisionTree(shuffle(data_set), ["ACD", "HM"], max_depth, entropy_cutoff) predict = input( "The decision tree has been trained. Would you like to make a prediction?(y/n) " ) if predict == "y": filename = input( "Please enter the name of the file containing text for author identification: " ) data_value = fp.process(filename, "NA") node = tree while node.FINAL_LABEL == "": if data_value[node.att_index] <= node.threshold: node = node.left elif data_value[node.att_index] > node.threshold: node = node.right if node.FINAL_LABEL == "ACD": print("The author is Arthur Conan Doyle") else: print("The author is Herman Melville") elif sys.argv[1] == "predict": filename = sys.argv[2] print("Predicting using an existing model: ") model_file = open("model_perceptron.txt", "r") line = model_file.readline().split(",") weights = [] for weight in line: weights.append(float(weight)) data_value = fp.process(filename, "NA") prediction = pt.predict(data_value, weights) if int(prediction) == 1: print("Author is Arthur Conan Doyle.") elif int(prediction) == 0: print("Author is Herman Melville") else: print("Please enter argument 'train' or 'predict'. ") sys.exit(1)
def test_all(): for ts_input, expected in zip(ts_input_iris, ts_output_iris): output = perceptron.predict(ts_input) expected = 'OK' if expected == output else 'FAIL' iris_type_name = "Iris-setosa" if output == 1 else "Iris-versicolor" print(f'Input:{ts_input} Output: {output} = {iris_type_name}')
def accuracy(truth, output): n = truth.shape[0] accur = 0. for i in range(n): if truth[i] == output[i]: accur += 1 return accur / n data, labels = usps.load_train() data_test, labels_test = usps.load_test() for k in range(10): labels_k = perceptron.two_classes(labels, k) weights, errors = perceptron.train(data, labels_k, with_errors=True) print(k) output = np.array([perceptron.predict(weights, x) for x in data]) print(" Score (train)", accuracy(labels_k, output)) output = np.array([perceptron.predict(weights, x) for x in data_test]) print(" Score (test)", accuracy(perceptron.two_classes(labels_test, k), output)) pyplot.clf() pyplot.imshow(weights[:-1].reshape((16, 16)), cmap=pyplot.gray()) pyplot.colorbar() pyplot.savefig("usps_" + str(k) + "-weights.png") pyplot.plot(errors) pyplot.savefig("usps_" + str(k) + "-errors.png")