def test_classification_test_dataset(self): # ------------------------------------------------------------------------------- # Use the best method (perceptron, average perceptron or Pegasos) along with # the optimal hyperparameters according to validation accuracies to test # against the test dataset. The test data has been provided as # test_bow_features and test_labels. # ------------------------------------------------------------------------------- T = 25 L = 0.01 theta, theta_0 = p1.pegasos(feature_matrix=train_bow_features, labels=train_labels, T=T, L=L) pred_labels = p1.classify(test_bow_features, theta, theta_0) accuracy = p1.accuracy(pred_labels, test_labels) print(f'Accuracy on test data : {accuracy}') # ------------------------------------------------------------------------------- # Assign to best_theta, the weights (and not the bias!) learned by your most # accurate algorithm with the optimal choice of hyperparameters. # ------------------------------------------------------------------------------- best_theta = theta wordlist = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))] sorted_word_features = utils.most_explanatory_word(best_theta, wordlist) print("Most Explanatory Word Features") print(sorted_word_features[:10]) print("Least Explanatory Word Features") print(sorted_word_features[-10:]) return
def problem5(T = 10, L = 0.2): toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L) plot_toy_results('Perceptron', thetas_perceptron, toy_features, toy_labels) plot_toy_results('Average Perceptron', thetas_avg_perceptron, toy_features, toy_labels) plot_toy_results('Pegasos', thetas_pegasos, toy_features, toy_labels)
def test_algorithm_compare(self): # ------------------------------------------------------------------------------- # # Problem 5 # #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 100 L = 0.2 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L) def plot_toy_results(algo_name, thetas): print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0])))) print('theta_0 for', algo_name, 'is', str(thetas[1])) utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas) plot_toy_results('Perceptron', thetas_perceptron) plot_toy_results('Average Perceptron', thetas_avg_perceptron) plot_toy_results('Pegasos', thetas_pegasos) return
utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T) utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L) ''' #------------------------------------------------------------------------------- # Use the best method (perceptron, average perceptron or Pegasos) along with # the optimal hyperparameters according to validation accuracies to test # against the test dataset. The test data has been provided as # test_bow_features and test_labels. #------------------------------------------------------------------------------- # Your code here T = 25 L = 0.01 th, th_0 = p1.pegasos(train_bow_features, train_labels, T, L) rezul = p1.classify(test_bow_features, th, th_0) print('REZ = {:.4f}'.format(p1.accuracy(rezul, test_labels))) #------------------------------------------------------------------------------- # Assign to best_theta, the weights (and not the bias!) learned by your most # accurate algorithm with the optimal choice of hyperparameters. #------------------------------------------------------------------------------- ''' T = 25 L = 0.01 th, th_0 = p1.pegasos(train_bow_features , train_labels, T, L) best_theta = th wordlist = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))] sorted_word_features = utils.most_explanatory_word(best_theta, wordlist) print("Most Explanatory Word Features")
def train_fn(features, labels, T): return p1.pegasos(features, labels, T, best_L)
# utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T) # utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L) #------------------------------------------------------------------------------- # Use the best method (perceptron, average perceptron or Pegasos) along with # the optimal hyperparameters according to validation accuracies to test # against the test dataset. The test data has been provided as # test_bow_features and test_labels. #------------------------------------------------------------------------------- T=25 L=0.0100 avg_peg_train_accuracy, avg_peg_val_accuracy = \ p1.classifier_accuracy(p1.pegasos, train_bow_features,test_bow_features,train_labels,test_labels,T=T,L=L) print("{:50} {:.4f}".format("Training accuracy for Pegasos:", avg_peg_train_accuracy)) print("{:50} {:.4f}".format("Validation accuracy for Pegasos:", avg_peg_val_accuracy)) thetas_pegasos = p1.pegasos(train_bow_features, train_labels, T, L) # print(thetas_pegasos) #------------------------------------------------------------------------------- # Assign to best_theta, the weights (and not the bias!) learned by your most # accurate algorithm with the optimal choice of hyperparameters. #------------------------------------------------------------------------------- best_theta =thetas_pegasos[0] wordlist = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))] sorted_word_features = utils.most_explanatory_word(best_theta, p1.bag_of_words(wordlist)) print("Most Explanatory Word Features") print(sorted_word_features[:10]) #-------------------------------------------------------------------------------
train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) #------------------------------------------------------------------------------- # Problem 5 #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 2000 L = 0.2 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L) def plot_toy_results(algo_name, thetas): print('theta for', algo_name, 'is', ', '.join(map(str,list(thetas[0])))) print('theta_0 for', algo_name, 'is', str(thetas[1])) utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas) plot_toy_results('Perceptron', thetas_perceptron) plot_toy_results('Average Perceptron', thetas_avg_perceptron) plot_toy_results('Pegasos', thetas_pegasos) #------------------------------------------------------------------------------- # Problem 7 #------------------------------------------------------------------------------- T = 10
train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) #------------------------------------------------------------------------------- # Problem 5 #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 10 L = 0.2 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L) def plot_toy_results(algo_name, thetas): print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0])))) print('theta_0 for', algo_name, 'is', str(thetas[1])) #utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas) plot_toy_results('Perceptron', thetas_perceptron) plot_toy_results('Average Perceptron', thetas_avg_perceptron) plot_toy_results('Pegasos', thetas_pegasos) #------------------------------------------------------------------------------- # Problem 7 #-------------------------------------------------------------------------------
#utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results) #utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T) #utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L) #------------------------------------------------------------------------------- # Use the best method (perceptron, average perceptron or Pegasos) along with # the optimal hyperparameters according to validation accuracies to test # against the test dataset. The test data has been provided as # test_bow_features and test_labels. #------------------------------------------------------------------------------- L = 0.01 T = 25 #theta, theta_0 = p1.pegasos(train_bow_features, train_labels, T, L) #labels = p1.classify(test_bow_features, theta, theta_0) #kwargs={'T':T,'L': L} #acc = p1.classifier_accuracy(p1.pegasos, test_bow_features, test_bow_features, labels, test_labels, **kwargs) #print(acc) #------------------------------------------------------------------------------- # Assign to best_theta, the weights (and not the bias!) learned by your most # accurate algorithm with the optimal choice of hyperparameters. #------------------------------------------------------------------------------- best_theta, theta_0 = p1.pegasos(train_bow_features, train_labels, T, L) wordlist = [ word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys())) ] sorted_word_features = utils.most_explanatory_word(best_theta, wordlist) print("Most Explanatory Word Features") print(sorted_word_features[:10])
train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) #------------------------------------------------------------------------------- # Problem 5 #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 10 L = 0.2 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L) def plot_toy_results(algo_name, thetas): print('theta for', algo_name, 'is', ', '.join(map(str,list(thetas[0])))) print('theta_0 for', algo_name, 'is', str(thetas[1])) utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas) plot_toy_results('Perceptron', thetas_perceptron) plot_toy_results('Average Perceptron', thetas_avg_perceptron) plot_toy_results('Pegasos', thetas_pegasos) #------------------------------------------------------------------------------- # Problem 7 #------------------------------------------------------------------------------- T = 10
# ------------------------------------------------------------------------------- # Use the best method (perceptron, average perceptron or Pegasos) along with # the optimal hyperparameters according to validation accuracies to test # against the test dataset. The test data has been provided as # test_bow_features and test_labels. # ------------------------------------------------------------------------------- # Your code here data = (train_bow_features, train_labels, test_bow_features, test_labels) fix_T = 25 Ls = [0.01] peg_tune_results_L = utils.tune_pegasos_L(fix_T, Ls, *data) print('Pegasos valid: tune L', list(zip(Ls, peg_tune_results_L[1]))) print('best = {:.4f}, L={:.4f}'.format(np.max(peg_tune_results_L[1]), Ls[np.argmax(peg_tune_results_L[1])])) # ------------------------------------------------------------------------------- # Assign to best_theta, the weights (and not the bias!) learned by your most # accurate algorithm with the optimal choice of hyperparameters. # ------------------------------------------------------------------------------- best_theta = p1.pegasos(test_bow_features, test_labels, 25, 0.01)[0] # best_theta = p1.pegasos(train_bow_features, train_labels, 25, 0.01)[0] wordlist = [ word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys())) ] sorted_word_features = utils.most_explanatory_word(best_theta, wordlist) print("Most Explanatory Word Features") print(sorted_word_features[:10])
# against the test dataset. The test data has been provided as # test_bow_features and test_labels. #------------------------------------------------------------------------------- peg_best_T = 25 peg_best_L = 0.01 peg_train_accuracy, peg_test_accuracy = p1.classifier_accuracy(p1.pegasos, train_bow_features, test_bow_features, train_labels, test_labels, T=peg_best_T, L=peg_best_L) print(peg_test_accuracy) peg_theta, peg_theta_0 = p1.pegasos(train_bow_features, train_labels, peg_best_T, peg_best_L) print(peg_theta, peg_theta_0) peg_test_preds = p1.classify(test_bow_features, peg_theta, peg_theta_0) #------------------------------------------------------------------------------- # Assign to best_theta, the weights (and not the bias!) learned by your most # accurate algorithm with the optimal choice of hyperparameters. #------------------------------------------------------------------------------- best_theta = peg_theta # Your code here wordlist = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))] sorted_word_features = utils.most_explanatory_word(best_theta, wordlist) print("Most Explanatory Word Features") print(sorted_word_features[:10])
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) #print(train_bow_features) #------------------------------------------------------------------------------- # Problem 5 #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 10 L = 0.2 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L) def plot_toy_results(algo_name, thetas): print('theta for', algo_name, 'is', ', '.join(map(str,list(thetas[0])))) print('theta_0 for', algo_name, 'is', str(thetas[1])) utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas) plot_toy_results('Perceptron', thetas_perceptron) plot_toy_results('Average Perceptron', thetas_avg_perceptron) plot_toy_results('Pegasos', thetas_pegasos) #------------------------------------------------------------------------------- # Problem 7 #------------------------------------------------------------------------------- T = 10
# # utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results) # utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results) # utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T) # utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L) #------------------------------------------------------------------------------- # Use the best method (perceptron, average perceptron or Pegasos) along with # the optimal hyperparameters according to validation accuracies to test # against the test dataset. The test data has been provided as # test_bow_features and test_labels. #------------------------------------------------------------------------------- T = 25 L = 0.01 avg_peg_train_accuracy, avg_peg_test_accuracy = p1.classifier_accuracy( p1.pegasos, train_bow_count_features, test_bow_count_features, train_labels, test_labels,T=T,L=L) print("{:50} {:.4f}".format("Training accuracy for Pegasos:", avg_peg_train_accuracy)) print("{:50} {:.4f}".format("Validation accuracy for Pegasos:", avg_peg_test_accuracy)) #------------------------------------------------------------------------------- # Assign to best_theta, the weights (and not the bias!) learned by your most # accurate algorithm with the optimal choice of hyperparameters. #------------------------------------------------------------------------------- theta, theta_0 = p1.pegasos(train_bow_count_features, train_labels, T, L) best_theta = theta # Your code here wordlist = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))] sorted_word_features = utils.most_explanatory_word(best_theta, wordlist) print("Most Explanatory Word Features") print(sorted_word_features[:10])
# Use the best method (perceptron, average perceptron or Pegasos) along with # the optimal hyperparameters according to validation accuracies to test # against the test dataset. The test data has been provided as # test_bow_features and test_labels. #------------------------------------------------------------------------------- hyper_param = dict({'T': 25, 'L': 0.01}) accuracy_train, accuracy_test = p1.classifier_accuracy(p1.pegasos, train_bow_features, \ test_bow_features, train_labels, test_labels, **hyper_param) print("accuracy on test set= {:.4f} ".format(accuracy_test)) #------------------------------------------------------------------------------- # Assign to best_theta, the weights (and not the bias!) learned by your most # accurate algorithm with the optimal choice of hyperparameters. #------------------------------------------------------------------------------- best_theta = p1.pegasos(train_bow_features, train_labels, hyper_param.get('T'), hyper_param.get('L'))[0] wordlist = [ word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys())) ] sorted_word_features = utils.most_explanatory_word(best_theta, wordlist) print("Most Explanatory Word Features") print(sorted_word_features[:10]) print("Most (negative) Explanatory Word Features") print(sorted_word_features[-10:]) #------------------------------------------------------------------------------- # Removing stopwords and using the Pegasos algortihm with the optimised # hyperparameters. #-------------------------------------------------------------------------------
# utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L) #------------------------------------------------------------------------------- # Use the best method (perceptron, average perceptron or Pegasos) along with # the optimal hyperparameters according to validation accuracies to test # against the test dataset. The test data has been provided as # test_bow_features and test_labels. #------------------------------------------------------------------------------- # Your code here T = 25 L = 0.01 test_peg_train_accuracy, test_peg_val_accuracy = \ p1.classifier_accuracy(p1.pegasos, train_bow_features,test_bow_features,train_labels,test_labels,T=T,L=L) print(test_peg_val_accuracy) #------------------------------------------------------------------------------- # Assign to best_theta, the weights (and not the bias!) learned by your most # accurate algorithm with the optimal choice of hyperparameters. #------------------------------------------------------------------------------- best_theta, _ = p1.pegasos(train_bow_features, train_labels, T, L) # Your code here wordlist = [ word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys())) ] sorted_word_features = utils.most_explanatory_word(best_theta, wordlist) print("Most Explanatory Word Features") print(sorted_word_features[:10])
train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) #------------------------------------------------------------------------------- # Problem 5 #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 200 L = 0.2 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L) def plot_toy_results(algo_name, thetas): print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0])))) print('theta_0 for', algo_name, 'is', str(thetas[1])) utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas) #plot_toy_results('Perceptron', thetas_perceptron) #plot_toy_results('Average Perceptron', thetas_avg_perceptron) #plot_toy_results('Pegasos', thetas_pegasos) #------------------------------------------------------------------------------- # Problem 7 #-------------------------------------------------------------------------------
# utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results) # utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T) # utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L) #------------------------------------------------------------------------------- # Use the best method (perceptron, average perceptron or Pegasos) along with # the optimal hyperparameters according to validation accuracies to test # against the test dataset. The test data has been provided as # test_bow_features and test_labels. #------------------------------------------------------------------------------- # Your code here L_best = 0.01 T_best = 25 theta, theta_0 = p1.pegasos(train_bow_features, train_labels, L=L_best, T=T_best) labels_hat = p1.classify(test_bow_features, theta, theta_0) print(p1.accuracy(labels_hat, test_labels)) #------------------------------------------------------------------------------- # Assign to best_theta, the weights (and not the bias!) learned by your most # accurate algorithm with the optimal choice of hyperparameters. #------------------------------------------------------------------------------- best_theta = theta wordlist = [ word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys())) ] sorted_word_features = utils.most_explanatory_word(best_theta, wordlist) print("Most Explanatory Word Features") print(sorted_word_features[-10:])
train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) # ----------------------------------------------------------------------------- # Problem 5 # ----------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 10 L = 0.2 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L) def plot_toy_results(algo_name, thetas): print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0])))) print('theta_0 for', algo_name, 'is', str(thetas[1])) utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas) #plot_toy_results('Perceptron', thetas_perceptron) #plot_toy_results('Average Perceptron', thetas_avg_perceptron) #plot_toy_results('Pegasos', thetas_pegasos) ## Convergence check #for t in range(2, 20000, 1000):
dictionary = p1.bag_of_words(train_texts) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 500 L = 0.2 #Find the theta and theta_0 for corresponding algorithms thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L) def plot_toy_results(algo_name, thetas): print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0])))) print('theta_0 for', algo_name, 'is', str(thetas[1])) utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas) plot_toy_results('Perceptron', thetas_perceptron) plot_toy_results('Average Perceptron', thetas_avg_perceptron) plot_toy_results('Pegasos', thetas_pegasos) T = 10 L = 0.01
# # utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results) # utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results) # utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T) # utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L) #------------------------------------------------------------------------------- # Use the best method (perceptron, average perceptron or Pegasos) along with # the optimal hyperparameters according to validation accuracies to test # against the test dataset. The test data has been provided as # test_bow_features and test_labels. #------------------------------------------------------------------------------- # Your code here theta, theta_0 = p1.pegasos(train_bow_features, train_labels, 25, 0.01) acc = p1.classifier_accuracy(p1.pegasos, train_bow_features, test_bow_features, train_labels, test_labels, T=25, L=0.01) print(acc) #------------------------------------------------------------------------------- # Assign to best_theta, the weights (and not the bias!) learned by your most # accurate algorithm with the optimal choice of hyperparameters. #------------------------------------------------------------------------------- best_theta = theta # Your code here
train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) #------------------------------------------------------------------------------- # Problem 5 #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 10 L = 0.2 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L) def plot_toy_results(algo_name, thetas): print('theta for', algo_name, 'is', ', '.join(map(str,list(thetas[0])))) print('theta_0 for', algo_name, 'is', str(thetas[1])) utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas) plot_toy_results('Perceptron', thetas_perceptron) plot_toy_results('Average Perceptron', thetas_avg_perceptron) plot_toy_results('Pegasos', thetas_pegasos) #------------------------------------------------------------------------------- # Problem 7 #-------------------------------------------------------------------------------
train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) #------------------------------------------------------------------------------- # Problem 5 #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 10 L = 0.2 # thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L) # def plot_toy_results(algo_name, thetas): print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0])))) print('theta_0 for', algo_name, 'is', str(thetas[1])) utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas) # plot_toy_results('Perceptron', thetas_perceptron) plot_toy_results('Average Perceptron', thetas_avg_perceptron) plot_toy_results('Pegasos', thetas_pegasos) #-------------------------------------------------------------------------------
print('Pegasos valid: tune L', list(zip(Ls, peg_tune_results_L[1]))) print('best = {:.4f}, L={:.4f}'.format(np.max(peg_tune_results_L[1]), Ls[np.argmax(peg_tune_results_L[1])])) utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results) utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results) utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T) utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L) #------------------------------------------------------------------------------- # Use the best method (perceptron, average perceptron or Pegasos) along with # the optimal hyperparameters according to validation accuracies to test # against the test dataset. #------------------------------------------------------------------------------- (test_theta, test_theta_0) = p1.pegasos(train_bow_features, train_labels, 25, 0.01) n = np.size(test_bow_features, 0) z = p1.classify(test_bow_features, test_theta, test_theta_0) train_error = p1.accuracy(z, test_labels) print("Train accuracy is: ", train_error) # #------------------------------------------------------------------------------- # # Assign to best_theta, the weights (and not the bias!) learned by your most # # accurate algorithm with the optimal choice of hyperparameters. # #------------------------------------------------------------------------------- best_theta = test_theta wordlist = [ word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys())) ] sorted_word_features = utils.most_explanatory_word(best_theta, wordlist) print("Most Explanatory Word Features")
train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) #------------------------------------------------------------------------------- # Problem 5 #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 10 L = 0.2 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L) def plot_toy_results(algo_name, thetas): print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0])))) print('theta_0 for', algo_name, 'is', str(thetas[1])) utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas) plot_toy_results('Perceptron', thetas_perceptron) plot_toy_results('Average Perceptron', thetas_avg_perceptron) plot_toy_results('Pegasos', thetas_pegasos) #------------------------------------------------------------------------------- # Problem 7 #-------------------------------------------------------------------------------
#------------------------------------------------------------------------------- # Use the best method (perceptron, average perceptron or Pegasos) along with # the optimal hyperparameters according to validation accuracies to test # against the test dataset. The test data has been provided as # test_bow_features and test_labels. #------------------------------------------------------------------------------- # Your code here test_acc = p1.classifier_accuracy(p1.pegasos, train_bow_features, test_bow_features, train_labels, test_labels, T=25, L=0.0100) print(test_acc) #------------------------------------------------------------------------------- # Assign to best_theta, the weights (and not the bias!) learned by your most # accurate algorithm with the optimal choice of hyperparameters. #------------------------------------------------------------------------------- best_theta = p1.pegasos(train_bow_features, train_labels, T=25, L=0.0100)[0] wordlist = [ word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys())) ] sorted_word_features = utils.most_explanatory_word(best_theta, wordlist) print("Most Explanatory Word Features") print(sorted_word_features[:10])
def train_fn(features, labels, L): return p1.pegasos(features, labels, best_T, L)
# peg_tune_results_L = utils.tune_pegasos_L(fix_T, Ls, *data) # print('Pegasos valid: tune L', list(zip(Ls, peg_tune_results_L[1]))) # print('best = {:.4f}, L={:.4f}'.format(np.max(peg_tune_results_L[1]), Ls[np.argmax(peg_tune_results_L[1])])) # utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results) # utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results) # utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T) # utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L) #------------------------------------------------------------------------------- # Use the best method (perceptron, average perceptron or Pegasos) along with # the optimal hyperparameters according to validation accuracies to test # against the test dataset. The test data has been provided as # test_bow_features and test_labels. #------------------------------------------------------------------------------- t, t0 = p1.pegasos(train_bow_features, train_labels, 25, 0.01) y_pred = p1.classify(test_bow_features, t, t0) print(p1.accuracy(y_pred, test_labels)) #------------------------------------------------------------------------------- # Assign to best_theta, the weights (and not the bias!) learned by your most # accurate algorithm with the optimal choice of hyperparameters. #------------------------------------------------------------------------------- # best_theta = t # wordlist = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))] # sorted_word_features = utils.most_explanatory_word(best_theta, wordlist) # print("Most Explanatory Word Features") # print(sorted_word_features[:10])
# Use the best method (perceptron, average perceptron or Pegasos) along with # the optimal hyperparameters according to validation accuracies to test # against the test dataset. The test data has been provided as # test_bow_features and test_labels. #------------------------------------------------------------------------------- print( p1.classifier_accuracy(p1.pegasos, train_bow_features, test_bow_features, train_labels, test_labels, T=25, L=0.01)) #------------------------------------------------------------------------------- # Assign to best_theta, the weights (and not the bias!) learned by your most # accurate algorithm with the optimal choice of hyperparameters. #------------------------------------------------------------------------------- best_theta, best_theta_0 = p1.pegasos(train_bow_features, train_labels, T=25, L=0.01) wordlist = [ word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys())) ] sorted_word_features = utils.most_explanatory_word(best_theta, wordlist) print("Most Explanatory Word Features") print(sorted_word_features[:10])
import project1 as p1 from project1 import perceptron, average_perceptron, pegasos import utils import numpy as np import numpy.testing as npt import re toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 10 L = 0.2 thetas = perceptron(toy_features, toy_labels, T) print(thetas) utils.plot_toy_data("Perceptron", toy_features, toy_labels, thetas) thetas = average_perceptron(toy_features, toy_labels, T) print(thetas) utils.plot_toy_data("Average Perceptron", toy_features, toy_labels, thetas) thetas = pegasos(toy_features, toy_labels, T, L) print(thetas) utils.plot_toy_data("Pegasos", toy_features, toy_labels, thetas)