def example(algorithm): dictionary = p1.extract_dictionary('train-tweet.txt') labels = p1.read_vector_file('train-answer.txt') feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary) if algorithm== 'averager': average_theta,average_theta_0 = p1.averager(feature_matrix, labels) elif algorithm== 'perceptron': average_theta,average_theta_0 = p1.perceptron_algorithm(feature_matrix, labels) elif algorithm== 'passive': average_theta,average_theta_0 = p1.passive_aggressive(feature_matrix, labels) label_output = p1.perceptron_classify(feature_matrix, average_theta_0, average_theta) correct = 0 for i in xrange(0, len(label_output)): if(label_output[i] == labels[i]): correct +=1 percentage_correct = 100.0 * correct / len(label_output) print(algorithm + " gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").")
# labels = [1, 1, -1, -1] # p1.plot_2d_examples(feature_matrix, labels, 0, [0.25, 0.6]) loose_points = np.array([ [-3,4], [-2,3], [2,4], [4,2], [-3,-2], [0,-2], [3,-3]]) loose_labels = np.array([1,1,1,1,-1,-1,-1]) average_theta, average_theta_0 = p1.averager(loose_points, loose_labels) p1.plot_2d_examples(loose_points, loose_labels, average_theta_0, average_theta, 'Averager - loose points') perceptron_theta, perceptron_theta_0 = p1.train_perceptron(loose_points, loose_labels) p1.plot_2d_examples(loose_points, loose_labels, perceptron_theta_0, perceptron_theta, 'Perceptron - loose points') pa_theta, pa_theta_0 = p1.train_passive_agressive(loose_points, loose_labels, 1000) p1.plot_2d_examples(loose_points, loose_labels, pa_theta_0, pa_theta, 'Passive Agressive - loose points') close_points = np.array([ [-1,-1.25], [-1.5, -1], [1,4], [1.5,1.5], [4,10], [-1,-1]])
import project1_code as p1 import numpy as np pos1 = [-0.3, 0.6] pos2 = [0.2, 0.7] pos3 = [1, 2] pos4 = [-0.5, -0.1] neg1 = [-0.1, -0.5] neg2 = [0.3, -0.1] neg3 = [0.6, -0.2] neg4 = [1, 0.5] feature_matrix = np.array([pos1, pos2, pos3, pos4, neg1, neg2, neg3, neg4]) feature_matrix_list = [pos1, pos2, pos3, pos4, neg1, neg2, neg3, neg4] labels = [1, 1, 1, 1, -1, -1, -1, -1] (nsamples, nfeatures) = feature_matrix.shape initial_theta = np.zeros([nfeatures]) (theta_0, average_theta) = p1.averager(feature_matrix, labels) p1.plot_2d_examples(feature_matrix_list, labels, 0, average_theta) theta = p1.perceptron(feature_matrix, initial_theta, 0, labels) p1.plot_2d_examples(feature_matrix_list, labels, 0, theta) theta = p1.pa(feature_matrix, initial_theta, 0, labels) p1.plot_2d_examples(feature_matrix_list, labels, 0, theta)
import numpy as np import project1_code as p1 dictionary = p1.extract_dictionary('train-tweet.txt') labels = p1.read_vector_file('train-answer.txt') feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary) feature_matrix_real = p1.extract_feature_vectors('sample_from_tweepy.txt', dictionary) average_without_offset_theta = p1.averager(feature_matrix, labels) theta_0 = average_without_offset_theta[len(average_without_offset_theta)-1] average_without_offset_theta = np.delete(average_without_offset_theta, len(average_without_offset_theta)-1) label_output = p1.perceptron_classify(feature_matrix, 0, average_without_offset_theta) correct = 0 for i in xrange(0, len(label_output)): if(label_output[i] == labels[i]): correct = correct + 1 percentage_correct = 100.0 * correct / len(label_output) print("Averager without offset gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").") average_theta = p1.averager(feature_matrix, labels) theta_0 = average_theta[len(average_theta)-1] average_theta = np.delete(average_theta, len(average_theta)-1) label_output = p1.perceptron_classify(feature_matrix, theta_0, average_theta) correct = 0
# pos1 = [-0.3, 0.4] # pos2 = [0.2, 0.3] # neg1 = [-0.1, -0.1] # neg2 = [0.3, 0.1] # feature_matrix = [pos1, pos2, neg1, neg2] # labels = [1, 1, -1, -1] # p1.plot_2d_examples(feature_matrix, labels, 0, [0.25, 0.6]) loose_points = np.array([[-3, 4], [-2, 3], [2, 4], [4, 2], [-3, -2], [0, -2], [3, -3]]) loose_labels = np.array([1, 1, 1, 1, -1, -1, -1]) average_theta, average_theta_0 = p1.averager(loose_points, loose_labels) p1.plot_2d_examples(loose_points, loose_labels, average_theta_0, average_theta, 'Averager - loose points') perceptron_theta, perceptron_theta_0 = p1.train_perceptron( loose_points, loose_labels) p1.plot_2d_examples(loose_points, loose_labels, perceptron_theta_0, perceptron_theta, 'Perceptron - loose points') pa_theta, pa_theta_0 = p1.train_passive_agressive(loose_points, loose_labels, 1000) p1.plot_2d_examples(loose_points, loose_labels, pa_theta_0, pa_theta, 'Passive Agressive - loose points') close_points = np.array([[-1, -1.25], [-1.5, -1], [1, 4], [1.5, 1.5], [4, 10], [-1, -1]])
#training ##dictionary = p1.extract_dictionary('train-tweet.txt') ##training_labels = p1.read_vector_file('train-answer.txt') ##training_feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary) ## ##ta, ta0 = p1.averager(training_feature_matrix, training_labels) ##tpc,tpc0= p1.perceptron_algorithm(training_feature_matrix, training_labels) ##tps,tps0= p1.passive_aggressive(training_feature_matrix, training_labels) ## ## ###testing ##testing_feature_matrix = p1.extract_feature_vectors('test-tweet.txt', dictionary) ## ##av_test_labels=p1.perceptron_classify(testing_feature_matrix, ta0, ta) ##pc_test_labels=p1.perceptron_classify(testing_feature_matrix, tpc0, tpc) ##ps_test_labels=p1.perceptron_classify(testing_feature_matrix, tps0, tps) ## ###plotting ####p1.plot_2d_examples(testing_feature_matrix, av_test_labels, ta0, ta) ####p1.plot_2d_examples(testing_feature_matrix, ps_test_labels, tps0, tps) ####p1.plot_2d_examples(testing_feature_matrix, pc_test_labels, tpc0, tpc) ##feature_matrix= np.array([[-3,2],[-1,1],[-1,-1],[2,2],[1,-1]]) ##labels=([1,1,-1,-1,-1]) ##feature_matrix= np.array([[3,3],[3,4],[3,2],[3,5],[2,1],[-1,0],[1,1],[-1,1],[-1,-1],[4,1],[5,1],[4,1],[1,-2],[4,0],[3,0],[3,-1]]) ##labels=([1,1,1,1,1,1,1,1,-1,-1,-1,-1,-1,-1,-1,-1]) feature_matrix= np.array([[0,5],[0,4],[0,6],[1,5],[1,7],[-1,5],[-1,6],[-1,7],[-1,-1],[-4,1],[-5,3],[-4,-3],[-1,-2],[-3,-4],[-2,-8],[0,-5]]) labels=([1,1,1,1,1,1,1,1,-1,-1,-1,-1,-1,-1,-1,-1]) theta, theta_0= p1.averager(feature_matrix, labels) p1.plot_2d_examples(feature_matrix, labels, theta_0, theta)
import numpy as np import project1_code as p1 dictionary = p1.extract_dictionary('train-tweet.txt') labels = p1.read_vector_file('train-answer.txt') feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary) average_theta = p1.averager(feature_matrix, labels) label_output = p1.perceptron_classify(feature_matrix, 0, average_theta) correct = 0 for i in xrange(0, len(label_output)): if (label_output[i] == labels[i]): correct = correct + 1 percentage_correct = 100.0 * correct / len(label_output) print("Averager gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").") p1.plot_2d_examples(feature_matrix, labels, 0, average_theta)
import numpy as np import project1_code as p1 dictionary = p1.extract_dictionary('train-tweet.txt') labels = p1.read_vector_file('train-answer.txt') feature_matrix = p1.extract_feature_vectors('train-tweet.txt', dictionary) average_theta = p1.averager(feature_matrix, labels) label_output = p1.perceptron_classify(feature_matrix, 0, average_theta) correct = 0 for i in xrange(0, len(label_output)): if(label_output[i] == labels[i]): correct = correct + 1 percentage_correct = 100.0 * correct / len(label_output) print("Averager gets " + str(percentage_correct) + "% correct (" + str(correct) + " out of " + str(len(label_output)) + ").") p1.plot_2d_examples(feature_matrix, labels, 0, average_theta)