def get_eout(f, g, num_points=1000, num_iters=1000): eout_sum = 0.0 for j in xrange(num_iters): misclassified = 0.0 for i in xrange(num_points): point = perceptron.random_point() point = array([1, point[0], point[1]]) misclassified += ( perceptron.classify(f, point) - perceptron.classify(g, point))**2/4 eout_sum += float(misclassified)/float(num_points) return eout_sum/float(num_iters)
def main(): y, x = perceptron.read_svm_file('data') x = perceptron.scale(x) w = [0, 0, 0]; alpha = 0.005 w, iters = perceptron.perc_alg(x, y, w, alpha) #Exclusively used for debugging thus far. yHat = perceptron.classify(x,w) print(yHat-y) x=np.asarray(x) # 0 = w0 + w1*x1 + w2*x2 => x2 = -w0 -(w1/w2)*x1 line = -w[0]/w[2] - x[:,0]*w[1]/w[2] plt.plot(x[0:14,0], x[0:14,1], 'y.', x[15:29, 0], x[15:29, 1], 'r.', x[:,0], line, 'g-') red_patch = mpatches.Patch(color='red', label='English') yellow_patch = mpatches.Patch(color='yellow', label='French') plt.legend(handles=[red_patch, yellow_patch], loc=4) plt.ylabel('Total number of letters') plt.xlabel('Number of A') plt.title('Perceptron') plt.show() y, x = perceptron.read_svm_file('data') x = perceptron.scale(x) w = [0, 0, 0]; alpha = 0.5 epsilon = 1e-3 w, iters = perceptron.perc_alg_reg(x, y, w, alpha, epsilon) #Exclusively used for debugging thus far. yHat = perceptron.classify(x,w) print(yHat-y) x=np.asarray(x) # 0 = w0 + w1*x1 + w2*x2 => x2 = -w0 -(w1/w2)*x1 line = -w[0]/w[2] - x[:,0]*w[1]/w[2] plt.plot(x[0:14,0], x[0:14,1], 'y.', x[15:29, 0], x[15:29, 1], 'r.', x[:,0], line, 'g-') red_patch = mpatches.Patch(color='red', label='English') yellow_patch = mpatches.Patch(color='yellow', label='French') plt.legend(handles=[red_patch, yellow_patch], loc=4) plt.ylabel('Total number of letters') plt.xlabel('Number of A') plt.title('Perceptron using Logistic Regression') plt.show()
def compute_eout(g, size=1000, iters=1000): eout_sum = 0.0 for j in xrange(iters): X = create_training_set(size)[0] Y = array([nonlinear_classification_fn(point[1], point[2]) for point in X]) X = transform_X(X) flip_ys(Y, percent=0.1) misclassified = 0.0 for i in xrange(len(X)): misclassified += (Y[i] - perceptron.classify(g, X[i]))**2/4 eout_sum += float(misclassified)/float(size) return float(eout_sum)/float(iters)
def plot_boundary(points, w): print("Calculating boundary...") # Generate a grid of points over the data x_mesh = mesh(points[:, 1]) y_mesh = mesh(points[:, 2]) grid_x, grid_y = np.meshgrid(x_mesh, y_mesh) grid = np.c_[grid_x.ravel(), grid_y.ravel()] # Classify points in the grid classifications = perceptron.classify(prepend_bias(grid), w).reshape(grid_x.shape) # Trace the decision boundary BLUE_AND_GREEN = ListedColormap(['#BBBBFF', '#BBFFBB']) plt.contourf(grid_x, grid_y, classifications, cmap=BLUE_AND_GREEN)
def main(args): train_set, train_labels, dev_set, dev_labels = reader.load_dataset( args.dataset_file) if not args.extra: predicted_labels = p.classify(train_set, train_labels, dev_set, args.lrate, args.max_iter) else: predicted_labels = p.classifyEC(train_set, train_labels, dev_set, args.lrate, args.max_iter) accuracy, f1, precision, recall = compute_accuracies( predicted_labels, dev_set, dev_labels) print("Accuracy:", accuracy) print("F1-Score:", f1) print("Precision:", precision) print("Recall:", recall)
def main(): tweet = sys.argv[1] preprocess_stop_terms_filename = "config_files/preprocess_stop_terms.txt" stop_terms = generate_stop_terms(preprocess_stop_terms_filename) preprocessed_tweet = preprocess(tweet, stop_terms) f = open("data_files/perceptron_weights.json") a = f.readlines()[0] vocab = json.loads(a) #self.vocab = json.load("data_files/perceptron_weights.json") weights = [] for (term, weight) in vocab.items(): weights.append(weight) features = generate_features(" ".join(preprocessed_tweet), vocab) classification = classify(weights, " ".join(preprocessed_tweet), features) print(classification)
def compute_ein(X, Y, g): total = len(X) misclassified = sum( [((Y[i] - perceptron.classify(g, X[i]))**2)/4 for i in xrange(len(X))]) return float(misclassified)/float(total)
################################# # Testing Perceptron as a whole # ################################# import perceptron as p import random import numpy as np # Need a random seed here NUM_TESTS = 10000 abttd = lambda x: np.hstack([x, np.ones((len(x), 1))]) testBatch = lambda p, s: [p.classify(i) for i in s] # may need to implement our own version of testBatch to make sure we don't depend # on student code, if we do infact make them write their own testBatch anyway # currently assuming that we have a train method (not a trainBatch) and that we have # a testBatch method that only takes in samples and returns list of predicted labels # so if this is not the case then NEED TO EDIT THESE # TEST A1 # # binary classification where bias isn't needed # this data is a collection of (x, y) coordinates # this data is supposed to be linearly seperable (without a bias needed) by the line x = y # i get 100% both with and without a bias term trainData1 = [[random.randint(1, 10), random.randint(1, 10)] for _ in range(NUM_TESTS)] trainLabels1 = [1 if x[0] >= x[1] else 0 for x in trainData1] trainData1 = np.array(trainData1) trainDataWithBias1 = abttd(trainData1)