def get_eout(f, g, num_points=1000, num_iters=1000):
  eout_sum = 0.0
  for j in xrange(num_iters):
    misclassified = 0.0
    for i in xrange(num_points):
      point = perceptron.random_point() 
      point = array([1, point[0], point[1]])
      misclassified += (
          perceptron.classify(f, point) - perceptron.classify(g, point))**2/4
    eout_sum += float(misclassified)/float(num_points)
  return eout_sum/float(num_iters)
def main():
	y, x = perceptron.read_svm_file('data')
	x = perceptron.scale(x)
	w = [0, 0, 0];
	alpha = 0.005
	w, iters = perceptron.perc_alg(x, y, w, alpha)

	#Exclusively used for debugging thus far.
	yHat = perceptron.classify(x,w)
	print(yHat-y)

	x=np.asarray(x)
	# 0 = w0 + w1*x1 + w2*x2 => x2 = -w0 -(w1/w2)*x1
	line = -w[0]/w[2] - x[:,0]*w[1]/w[2]

	plt.plot(x[0:14,0], x[0:14,1], 'y.', x[15:29, 0], x[15:29, 1], 'r.', x[:,0], line, 'g-')

	red_patch = mpatches.Patch(color='red', label='English')
	yellow_patch = mpatches.Patch(color='yellow', label='French')
	plt.legend(handles=[red_patch, yellow_patch], loc=4)

	plt.ylabel('Total number of letters')
	plt.xlabel('Number of A')
	plt.title('Perceptron')

	plt.show()

	y, x = perceptron.read_svm_file('data')
	x = perceptron.scale(x)
	w = [0, 0, 0];
	alpha = 0.5
	epsilon = 1e-3
	w, iters = perceptron.perc_alg_reg(x, y, w, alpha, epsilon)

	#Exclusively used for debugging thus far.
	yHat = perceptron.classify(x,w)
	print(yHat-y)

	x=np.asarray(x)
	# 0 = w0 + w1*x1 + w2*x2 => x2 = -w0 -(w1/w2)*x1
	line = -w[0]/w[2] - x[:,0]*w[1]/w[2]

	plt.plot(x[0:14,0], x[0:14,1], 'y.', x[15:29, 0], x[15:29, 1], 'r.', x[:,0], line, 'g-')

	red_patch = mpatches.Patch(color='red', label='English')
	yellow_patch = mpatches.Patch(color='yellow', label='French')
	plt.legend(handles=[red_patch, yellow_patch], loc=4)

	plt.ylabel('Total number of letters')
	plt.xlabel('Number of A')
	plt.title('Perceptron using Logistic Regression')

	plt.show()
def compute_eout(g, size=1000, iters=1000):
  eout_sum = 0.0
  for j in xrange(iters):
    X = create_training_set(size)[0]
    Y = array([nonlinear_classification_fn(point[1], point[2]) for point in X])
    X = transform_X(X)
    flip_ys(Y, percent=0.1)
    misclassified = 0.0
    for i in xrange(len(X)):
      misclassified += (Y[i] - perceptron.classify(g, X[i]))**2/4
    eout_sum += float(misclassified)/float(size)
  return float(eout_sum)/float(iters)
Beispiel #4
0
def plot_boundary(points, w):
    print("Calculating boundary...")
    # Generate a grid of points over the data
    x_mesh = mesh(points[:, 1])
    y_mesh = mesh(points[:, 2])
    grid_x, grid_y = np.meshgrid(x_mesh, y_mesh)
    grid = np.c_[grid_x.ravel(), grid_y.ravel()]
    # Classify points in the grid
    classifications = perceptron.classify(prepend_bias(grid),
                                          w).reshape(grid_x.shape)
    # Trace the decision boundary
    BLUE_AND_GREEN = ListedColormap(['#BBBBFF', '#BBFFBB'])
    plt.contourf(grid_x, grid_y, classifications, cmap=BLUE_AND_GREEN)
Beispiel #5
0
def main(args):
    train_set, train_labels, dev_set, dev_labels = reader.load_dataset(
        args.dataset_file)
    if not args.extra:
        predicted_labels = p.classify(train_set, train_labels, dev_set,
                                      args.lrate, args.max_iter)
    else:
        predicted_labels = p.classifyEC(train_set, train_labels, dev_set,
                                        args.lrate, args.max_iter)
    accuracy, f1, precision, recall = compute_accuracies(
        predicted_labels, dev_set, dev_labels)
    print("Accuracy:", accuracy)
    print("F1-Score:", f1)
    print("Precision:", precision)
    print("Recall:", recall)
Beispiel #6
0
def main():
    tweet = sys.argv[1]
    preprocess_stop_terms_filename = "config_files/preprocess_stop_terms.txt"
    stop_terms = generate_stop_terms(preprocess_stop_terms_filename)
    preprocessed_tweet = preprocess(tweet, stop_terms)
    f = open("data_files/perceptron_weights.json")
    a = f.readlines()[0]
    vocab = json.loads(a)
    #self.vocab = json.load("data_files/perceptron_weights.json")
    weights = []
    for (term, weight) in vocab.items():
        weights.append(weight)
    features = generate_features(" ".join(preprocessed_tweet), vocab)
    classification = classify(weights, " ".join(preprocessed_tweet), features)
    print(classification)
def compute_ein(X, Y, g):
  total = len(X)
  misclassified = sum(
      [((Y[i] - perceptron.classify(g, X[i]))**2)/4
        for i in xrange(len(X))])
  return float(misclassified)/float(total)
#################################
# Testing Perceptron as a whole #
#################################

import perceptron as p
import random
import numpy as np

# Need a random seed here
NUM_TESTS = 10000
abttd = lambda x: np.hstack([x, np.ones((len(x), 1))])
testBatch = lambda p, s: [p.classify(i) for i in s]
# may need to implement our own version of testBatch to make sure we don't depend
# on student code, if we do infact make them write their own testBatch anyway

# currently assuming that we have a train method (not a trainBatch) and that we have
# a testBatch method that only takes in samples and returns list of predicted labels
# so if this is not the case then NEED TO EDIT THESE

# TEST A1 #

# binary classification where bias isn't needed
# this data is a collection of (x, y) coordinates
# this data is supposed to be linearly seperable (without a bias needed) by the line x = y
# i get 100% both with and without a bias term

trainData1 = [[random.randint(1, 10),
               random.randint(1, 10)] for _ in range(NUM_TESTS)]
trainLabels1 = [1 if x[0] >= x[1] else 0 for x in trainData1]
trainData1 = np.array(trainData1)
trainDataWithBias1 = abttd(trainData1)
#################################
# Testing Perceptron as a whole #
#################################

import perceptron as p
import random
import numpy as np

# Need a random seed here
NUM_TESTS = 10000
abttd = lambda x: np.hstack([x, np.ones((len(x), 1))])
testBatch = lambda p, s: [p.classify(i) for i in s]
# may need to implement our own version of testBatch to make sure we don't depend
# on student code, if we do infact make them write their own testBatch anyway

# currently assuming that we have a train method (not a trainBatch) and that we have 
# a testBatch method that only takes in samples and returns list of predicted labels
# so if this is not the case then NEED TO EDIT THESE

# TEST A1 #

# binary classification where bias isn't needed 
# this data is a collection of (x, y) coordinates
# this data is supposed to be linearly seperable (without a bias needed) by the line x = y
# i get 100% both with and without a bias term

trainData1 = [[random.randint(1, 10), random.randint(1, 10)] for _ in range(NUM_TESTS)]
trainLabels1 = [1 if x[0] >= x[1] else 0 for x in trainData1]
trainData1 = np.array(trainData1)
trainDataWithBias1 = abttd(trainData1)