def problem5(T = 10, L = 0.2): toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L) plot_toy_results('Perceptron', thetas_perceptron, toy_features, toy_labels) plot_toy_results('Average Perceptron', thetas_avg_perceptron, toy_features, toy_labels) plot_toy_results('Pegasos', thetas_pegasos, toy_features, toy_labels)
def test_algorithm_compare(self): # ------------------------------------------------------------------------------- # # Problem 5 # #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 100 L = 0.2 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L) def plot_toy_results(algo_name, thetas): print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0])))) print('theta_0 for', algo_name, 'is', str(thetas[1])) utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas) plot_toy_results('Perceptron', thetas_perceptron) plot_toy_results('Average Perceptron', thetas_avg_perceptron) plot_toy_results('Pegasos', thetas_pegasos) return
dictionary = p1.bag_of_words(train_texts) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) # #------------------------------------------------------------------------------- # Section 1.7 #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 5 L = 10 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_avg_pa = p1.average_passive_aggressive(toy_features, toy_labels, T, L) def plot_toy_results(algo_name, thetas): utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas) plot_toy_results('Perceptron', thetas_perceptron) plot_toy_results('Average Perceptron', thetas_avg_perceptron) plot_toy_results('Average Passive-Aggressive', thetas_avg_pa) #------------------------------------------------------------------------------- # # #------------------------------------------------------------------------------- # Section 2.9.b
val_final_features = p1.extract_final_features(val_texts, dictionary) test_final_features = p1.extract_final_features(test_texts, dictionary) #data = (train_final_features, train_labels, val_final_features, val_labels) # ## values of T and lambda to try #Ts = [1, 5, 10, 15, 25, 50, 100] # #avg_pct_tune_results = utils.tune_avg_perceptron(Ts, *data) #utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results) print(p1.average_perceptron_accuracy(train_final_features,val_final_features,train_labels,val_labels,T=15)) print(p1.average_perceptron_accuracy(val_final_features,test_final_features,val_labels,test_labels,T=15)) ## Improve 2 theta = p1.average_perceptron(train_bow_features, train_labels, T=15)[0] new_dictionary = p1.bag_of_elite_words(dictionary, theta, 0.25) train_final_features = p1.extract_final_features(train_texts, new_dictionary) val_final_features = p1.extract_final_features(val_texts, new_dictionary) test_final_features = p1.extract_final_features(test_texts, new_dictionary) #data = (train_final_features, train_labels, val_final_features, val_labels) # ## values of T and lambda to try #Ts = [1, 5, 10, 15, 25, 50, 100] # #avg_pct_tune_results = utils.tune_avg_perceptron(Ts, *data) #utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results) print(p1.average_perceptron_accuracy(train_final_features,val_final_features,train_labels,val_labels,T=100))
import project1 as p1 from project1 import perceptron, average_perceptron, pegasos import utils import numpy as np import numpy.testing as npt import re toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 10 L = 0.2 thetas = perceptron(toy_features, toy_labels, T) print(thetas) utils.plot_toy_data("Perceptron", toy_features, toy_labels, thetas) thetas = average_perceptron(toy_features, toy_labels, T) print(thetas) utils.plot_toy_data("Average Perceptron", toy_features, toy_labels, thetas) thetas = pegasos(toy_features, toy_labels, T, L) print(thetas) utils.plot_toy_data("Pegasos", toy_features, toy_labels, thetas)
dictionary = p1.bag_of_words(train_texts) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) # #------------------------------------------------------------------------------- # Section 1.7 #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 5 L = 10 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_avg_pa = p1.average_passive_aggressive(toy_features, toy_labels, T, L) def plot_toy_results(algo_name, thetas): utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas) plot_toy_results('Perceptron', thetas_perceptron) plot_toy_results('Average Perceptron', thetas_avg_perceptron) plot_toy_results('Average Passive-Aggressive', thetas_avg_pa) #------------------------------------------------------------------------------- # # #------------------------------------------------------------------------------- # Section 2.9.b #------------------------------------------------------------------------------- # T = 5
x = p1.average_perceptron_accuracy(train_bow_features, test_bow_features, train_labels, test_labels, T=25) print(x) #------------------------------------------------------------------------------- # pass #------------------------------------------------------------------------------- # #------------------------------------------------------------------------------- # Section 2.11b # # Assign to best_theta, the weights (and not the bias!) learned by your most # accurate algorithm with the optimal choice of hyperparameters. #------------------------------------------------------------------------------- best_theta = p1.average_perceptron(test_bow_features, test_labels, T=25)[0] wordlist = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))] sorted_word_features = utils.most_explanatory_word(best_theta, wordlist) print("Most Explanatory Word Features") print(sorted_word_features[:80]) #------------------------------------------------------------------------------- # #------------------------------------------------------------------------------- # Section 3.12 # # After constructing a final feature representation, use code similar to that in # sections 2.9b and 2.10 to assess its performance on the validation set. # You may use your best classifier from before as a baseline. # When you are satisfied with your features, evaluate their accuracy on the test # set using the same procedure as in section 2.11a. #-------------------------------------------------------------------------------
#------------------------------------------------------------------------------- # dictionary = p1.bag_of_words(train_texts) # # train_final_features = p1.extract_final_features(train_texts, dictionary) # val_final_features = p1.extract_final_features(val_texts, dictionary) # test_final_features = p1.extract_final_features(test_texts, dictionary) #------------------------------------------------------------------------------- # #------------------------------------------------------------------------------- # Section 3.13 # # Modify the code below to extract your best features from the submission data # and then classify it using your most accurate classifier. #------------------------------------------------------------------------------- submit_texts = [sample['text'] for sample in utils.load_data('reviews_submit.tsv')] # 1. Extract your preferred features from the train and submit data dictionary = p1.bag_of_words(submit_texts) #print("dctionary, ", dictionary) train_final_features = p1.extract_final_features(train_texts, dictionary) #print (train_final_features) submit_final_features = p1.extract_final_features(submit_texts, dictionary) # 2. Train your most accurate classifier final_thetas = p1.average_perceptron(train_final_features, train_labels, T=25) # 3. Classify and write out the submit predictions. submit_predictions = p1.classify(submit_final_features, *final_thetas) utils.write_predictions('reviews_submit.tsv', submit_predictions) #-------------------------------------------------------------------------------