Example #1
0
def tune(train_fn, param_vals, train_feats, train_labels, val_feats,
         val_labels):
    train_accs = np.ndarray(len(param_vals))
    val_accs = np.ndarray(len(param_vals))
    for i, val in enumerate(param_vals):
        theta, theta_0 = train_fn(train_feats, train_labels, val)

        train_preds = p1.classify(train_feats, theta, theta_0)
        train_accs[i] = p1.accuracy(train_preds, train_labels)

        val_preds = p1.classify(val_feats, theta, theta_0)
        val_accs[i] = p1.accuracy(val_preds, val_labels)
    return train_accs, val_accs
Example #2
0
def tune(train_fn, param_vals, train_feats, train_labels, val_feats, val_labels):
    train_accs = np.ndarray(len(param_vals))
    val_accs = np.ndarray(len(param_vals))

    for i, val in enumerate(param_vals):
        theta, theta_0 = train_fn(train_feats, train_labels, val)

        train_preds = p1.classify(train_feats, theta, theta_0)
        train_accs[i] = p1.accuracy(train_preds, train_labels)

        val_preds = p1.classify(val_feats, theta, theta_0)
        val_accs[i] = p1.accuracy(val_preds, val_labels)

    return train_accs, val_accs
    def test_classification_test_dataset(self):
        # -------------------------------------------------------------------------------
        # Use the best method (perceptron, average perceptron or Pegasos) along with
        # the optimal hyperparameters according to validation accuracies to test
        # against the test dataset. The test data has been provided as
        # test_bow_features and test_labels.
        # -------------------------------------------------------------------------------
        T = 25
        L = 0.01
        theta, theta_0 = p1.pegasos(feature_matrix=train_bow_features, labels=train_labels, T=T, L=L)
        pred_labels = p1.classify(test_bow_features, theta, theta_0)
        accuracy = p1.accuracy(pred_labels, test_labels)
        print(f'Accuracy on test data : {accuracy}')
        # -------------------------------------------------------------------------------
        # Assign to best_theta, the weights (and not the bias!) learned by your most
        # accurate algorithm with the optimal choice of hyperparameters.
        # -------------------------------------------------------------------------------

        best_theta = theta
        wordlist = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
        sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
        print("Most Explanatory Word Features")
        print(sorted_word_features[:10])
        print("Least Explanatory Word Features")
        print(sorted_word_features[-10:])
        return
Example #4
0
# peg_tune_results_L = utils.tune_pegasos_L(fix_T, Ls, *data)
# print('Pegasos valid: tune L', list(zip(Ls, peg_tune_results_L[1])))
# print('best = {:.4f}, L={:.4f}'.format(np.max(peg_tune_results_L[1]), Ls[np.argmax(peg_tune_results_L[1])]))

# utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results)
# utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
# utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
# utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

t, t0 = p1.pegasos(train_bow_features, train_labels, 25, 0.01)
y_pred = p1.classify(test_bow_features, t, t0)
print(p1.accuracy(y_pred, test_labels))

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

# best_theta = t
# wordlist   = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
# sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
# print("Most Explanatory Word Features")
# print(sorted_word_features[:10])
Example #5
0
# utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results)
# utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
# utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
# utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

# Your code here
theta, theta_0 = p1.pegasos(train_bow_features, train_labels, 25, 0.01)
labels = p1.classify(test_bow_features, theta, theta_0)
print(p1.accuracy(labels, test_labels))

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta = theta  # Your code here
wordlist = [
    word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))
]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])
Example #6
0
# print('Pegasos valid: tune L', list(zip(Ls, peg_tune_results_L[1])))
# print('best = {:.4f}, L={:.4f}'.format(np.max(peg_tune_results_L[1]), Ls[np.argmax(peg_tune_results_L[1])]))

# utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results)
# utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
# utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
# utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

theta, theta_0 = p1.pegasos(train_bow_features, train_labels, 25, 0.01)
test_classified = p1.classify(test_bow_features, theta, theta_0)
acc = p1.accuracy(test_classified, test_labels)
print('Accuracy: ', acc)

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

# best_theta = theta # Your code here
# wordlist   = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
# sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
# print("Most Explanatory Word Features")
# print(sorted_word_features[:10])
Example #7
0
theta = p1.average_passive_aggressive(train_final_features, train_labels, T, L)[0]
best_theta = theta
wordlist   = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)

print("Most Explanatory Word Features")
print(sorted_word_features[:15])

#-------------------------------------------------------------------------------
#
#-------------------------------------------------------------------------------
# Section 3.13
#
# Modify the code below to extract your best features from the submission data
# and then classify it using your most accurate classifier.
#-------------------------------------------------------------------------------
submit_texts = [sample['text'] for sample in utils.load_data('reviews_submit.tsv')]

# 1. Extract your preferred features from the train and submit data
dictionary = p1.bag_of_words(train_texts)
train_final_features = p1.extract_final_features(train_texts, dictionary)
submit_final_features = p1.extract_final_features(submit_texts, dictionary)

# 2. Train your most accurate classifier
final_thetas = p1.average_passive_aggressive(train_final_features, train_labels, T=4, L = 75)

# 3. Classify and write out the submit predictions.
submit_predictions = p1.classify(submit_final_features, *final_thetas)
utils.write_predictions('reviews_submit.tsv', submit_predictions)
#-------------------------------------------------------------------------------
Example #8
0
# utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
# utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

print("")
print("#######")
print("pegasos accuracy on test set")
print("#######")

thetas = p1.pegasos(train_bow_features, train_labels, T=25, L=0.01)
test_classified = p1.classify(test_bow_features, thetas[0], thetas[1])
print(p1.accuracy(test_classified, test_labels))

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta = thetas[0]
wordlist = [
    word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))
]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])
#
utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results)
utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

theta, theta_0 = p1.pegasos(train_bow_features, train_labels, 25, 0.01)

pred_test = p1.classify(test_bow_features, theta, theta_0)

test_acc = p1.accuracy(pred_test, test_labels)

print("{:50} {:.4f}".format("Test accuracy for Pegasos:", test_acc))

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta = theta
wordlist = [
    word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))
]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
fix_T = Ts[np.argmax(peg_tune_results_T[1])]
peg_tune_results_L = utils.tune_pegasos_L(fix_T, Ls, *data)
print('Pegasos valid: tune L', list(zip(Ls, peg_tune_results_L[1])))
print('best = {:.4f}, L={:.4f}'.format(np.max(peg_tune_results_L[1]),
                                       Ls[np.argmax(peg_tune_results_L[1])]))

utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results)
utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#Using the best method (perceptron, average perceptron or Pegasos) along with
#the optimal hyperparameters according to validation accuracies to test
#against the test data.
optimal_T = 25
optimal_eta = 0.0100
best_theta, best_theta_0 = p1.pegasos(train_bow_features, train_labels,
                                      optimal_T, optimal_eta)
best_one = p1.classify(test_bow_features, best_theta, best_theta_0)
best_one_accuracy = p1.accuracy(best_one, test_labels)
print(best_one_accuracy)

#Assign to best_theta, the weights learned by most accurate algorithm
#with the optimal choice of hyperparameters.
wordlist = [
    word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))
]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])
Example #11
0
# utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
# utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
# utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

#learn theta and theta_0 from the training data
theta, theta_0 = p1.pegasos(train_bow_features, train_labels, 25, 0.01)

#create a prediction vector from the test data
prediction_vector = p1.classify(test_bow_features, theta, theta_0)

#calculate the accuracy on the test data
test_accuracy = p1.accuracy(prediction_vector, test_labels)

print("Accuracy on the test set: {:.3f}".format(test_accuracy))

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta = theta
wordlist = [
    word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))
]
Example #12
0
print('best = {:.4f}, L={:.4f}'.format(np.max(peg_tune_results_L[1]), Ls[np.argmax(peg_tune_results_L[1])]))

utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results)
utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

theta, theta_0 = p1.pegasos(train_bow_features, train_labels, T = 25, L = 0.01)
test_preds = p1.classify(test_bow_features, theta, theta_0)
test_accu = p1.accuracy(test_preds, test_labels)
print(test_accu)

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta = theta
wordlist   = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])

Example #13
0
File: main.py Project: wesenu/6.036
#-------------------------------------------------------------------------------
# dictionary = p1.bag_of_words(train_texts)
#
# train_final_features = p1.extract_final_features(train_texts, dictionary)
# val_final_features   = p1.extract_final_features(val_texts, dictionary)
# test_final_features  = p1.extract_final_features(test_texts, dictionary)
#-------------------------------------------------------------------------------
#
#-------------------------------------------------------------------------------
# Section 3.13
#
# Modify the code below to extract your best features from the submission data
# and then classify it using your most accurate classifier.
#-------------------------------------------------------------------------------
submit_texts = [sample['text'] for sample in utils.load_data('reviews_submit.tsv')]

# 1. Extract your preferred features from the train and submit data
dictionary = p1.bag_of_words(submit_texts)
#print("dctionary, ", dictionary)
train_final_features = p1.extract_final_features(train_texts, dictionary)
#print (train_final_features)
submit_final_features = p1.extract_final_features(submit_texts, dictionary)

# 2. Train your most accurate classifier
final_thetas = p1.average_perceptron(train_final_features, train_labels, T=25)

# 3. Classify and write out the submit predictions.
submit_predictions = p1.classify(submit_final_features, *final_thetas)
utils.write_predictions('reviews_submit.tsv', submit_predictions)
#-------------------------------------------------------------------------------
Example #14
0
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

peg_best_T = 25
peg_best_L = 0.01
peg_train_accuracy, peg_test_accuracy = p1.classifier_accuracy(p1.pegasos,
                                                               train_bow_features,
                                                               test_bow_features,
                                                               train_labels,
                                                               test_labels,
                                                               T=peg_best_T,
                                                               L=peg_best_L)

print(peg_test_accuracy)
peg_theta, peg_theta_0 = p1.pegasos(train_bow_features, train_labels,
                                    peg_best_T, peg_best_L)
print(peg_theta, peg_theta_0)
peg_test_preds = p1.classify(test_bow_features, peg_theta, peg_theta_0)

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta = peg_theta  # Your code here
wordlist = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])
Example #15
0
# utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
# utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
# utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

# Your code here
T = 25
L = 0.0100
theta, theta_0 = p1.pegasos(train_bow_features,train_labels,T,L)
train_accuracy = p1.accuracy(p1.classify(train_bow_features,theta, theta_0),train_labels)
val_accuracy = p1.accuracy(p1.classify(val_bow_features,theta, theta_0),val_labels)
test_accuracy = p1.accuracy(p1.classify(test_bow_features,theta, theta_0),test_labels)

print (train_accuracy, val_accuracy, test_accuracy)

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------
# T = 25
# L = 0.0100
# best_theta = p1.pegasos(train_bow_features,train_labels,T,L)[0] # Your code here
# wordlist   = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
# sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
# print("Most Explanatory Word Features")
Example #16
0
# utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

# Your code here

T = 25
L = 0.01

theta, theta_0 = p1.pegasos(train_bow_features,train_labels,T,L)

predictions = p1.classify(test_bow_features,theta, theta_0)
test_accuracy = p1.accuracy(predictions,test_labels)
print(f"test_accuracy: {test_accuracy}")

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta = theta # Your code here
wordlist   = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])
Example #17
0
utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

#Your code here

T = 25
L =0.0100
Q_theta, Q_theta_0 = p1.pegasos(train_bow_features, train_labels, T, L)
Q_pred = p1.classify(test_bow_features, Q_theta, Q_theta_0)
Q_accuracy = p1.accuracy(Q_pred, test_labels)
print(Q_accuracy)

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta = Q_theta # Your code here
wordlist   = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])
print(sorted_word_features[-10:])
Example #18
0
utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_bow_features.
#-------------------------------------------------------------------------------

print("")
# Your code here
T = 25
L = 0.01
b_theta, b_theta_0 = p1.pegasos(train_bow_features, train_labels, T, L)
result_labels = p1.classify(test_bow_features, b_theta, b_theta_0)
acc = p1.accuracy(test_labels, result_labels)
print(b_theta)
print(b_theta_0)
print(acc)

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta = b_theta  # Your code here
wordlist = [
    word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))
]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
Example #19
0
utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)
'''

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

best_theta, best_theta_0 = p1.pegasos(train_bow_features, train_labels, 25,
                                      0.01)
best_accuracy = p1.accuracy(
    p1.classify(test_bow_features, best_theta, best_theta_0), test_labels)
print('Accuracy on the test set: {:.4f}'.format(best_accuracy))

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta = best_theta
wordlist = [
    word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))
]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)

print("Most Explanatory Word Features")
print(sorted_word_features[:10])
Example #20
0
def testclass():
    feature_matrix = np.array([[1, 1], [1, 1], [1, 1]])
    theta = np.array([1, 1])
    theta_0 = 0
    p1.classify(feature_matrix, theta, theta_0)
import numpy as np

#-------------------------------------------------------------------------------
# Data loading. There is no need to edit code in this section.
#-------------------------------------------------------------------------------

train_data = utils.load_data('reviews_train.tsv')
Arquivo = open('stopwords.txt')
stopwords_data = Arquivo.read()
stopwords_data = str(stopwords_data).split()
Arquivo.close()

train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data))

dictionary = p1.bag_of_words(train_texts,stopwords_data)
train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
T=25
L=0.01
thetas_pegasos = p1.pegasos(train_bow_features, train_labels, T, L,)
run=1
while(1):
    input_texts=input('Input your review: ')
    input_bow_features = p1.extract_bow_feature_vectors(['blah',input_texts],dictionary)
    output=p1.classify(input_bow_features,thetas_pegasos[0],thetas_pegasos[1])
    if (output[-1])==1:
        print('_______________________________________________________________')
        print('This is a possitive review!')
    else:
        print('_______________________________________________________________')
        print('This is a negative review')
Example #22
0
utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results)
utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset.
#-------------------------------------------------------------------------------

(test_theta, test_theta_0) = p1.pegasos(train_bow_features, train_labels, 25,
                                        0.01)
n = np.size(test_bow_features, 0)
z = p1.classify(test_bow_features, test_theta, test_theta_0)
train_error = p1.accuracy(z, test_labels)
print("Train accuracy is: ", train_error)
# #-------------------------------------------------------------------------------
# # Assign to best_theta, the weights (and not the bias!) learned by your most
# # accurate algorithm with the optimal choice of hyperparameters.
# #-------------------------------------------------------------------------------

best_theta = test_theta
wordlist = [
    word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))
]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])
# peg_tune_results_L = utils.tune_pegasos_L(fix_T, Ls, *data)
# print('Pegasos valid: tune L', list(zip(Ls, peg_tune_results_L[1])))
# print('best = {:.4f}, L={:.4f}'.format(np.max(peg_tune_results_L[1]), Ls[np.argmax(peg_tune_results_L[1])]))

utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results)
utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
# utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
# utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

# Your code here
theta,theta_0 = p1.perceptron(test_bow_features, test_labels,50)
print(p1.classify(test_bow_features,theta,theta_0))

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta = theta # Your code here
wordlist   = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])
Example #24
0
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

# Your code here

data = (train_bow_features, train_labels, test_bow_features, test_labels)

theta, theta_Q = p1.pegasos(train_bow_features, train_labels, 25, 0.01)
# values of T and lambda to try
# Ts = [25]
# fix_L = 0.01


preds = p1.classify(test_bow_features, theta, theta_Q)
print(p1.accuracy(preds, test_labels))
print(theta)
# peg_tune_results_T = utils.tune_pegasos_T(fix_L, Ts, *data)
# print('Pegasos valid: tune T', list(zip(Ts, peg_tune_results_T[1])))
# print('best = {:.4f}, T={:.4f}'.format(
    # np.max(peg_tune_results_T[1]), Ts[np.argmax(peg_tune_results_T[1])]))




#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------
Example #25
0
utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)
'''

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

# Your code here

T = 25
L = 0.01
th, th_0 = p1.pegasos(train_bow_features, train_labels, T, L)
rezul = p1.classify(test_bow_features, th, th_0)
print('REZ = {:.4f}'.format(p1.accuracy(rezul, test_labels)))

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------
'''
T = 25
L = 0.01
th, th_0 = p1.pegasos(train_bow_features , train_labels, T, L)
best_theta = th
wordlist   = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])
Example #26
0
# print('best = {:.4f}, L={:.4f}'.format(np.max(peg_tune_results_L[1]), Ls[np.argmax(peg_tune_results_L[1])]))

# utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results)
# utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
# utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
# utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)

#-------------------------------------------------------------------------------
# Use the best method (perceptron, average perceptron or Pegasos) along with
# the optimal hyperparameters according to validation accuracies to test
# against the test dataset. The test data has been provided as
# test_bow_features and test_labels.
#-------------------------------------------------------------------------------

# Your code here

#-------------------------------------------------------------------------------
# Assign to best_theta, the weights (and not the bias!) learned by your most
# accurate algorithm with the optimal choice of hyperparameters.
#-------------------------------------------------------------------------------

best_theta = p1.pegasos(train_bow_features, train_labels, 25, 0.0100)[0]
print("Accuracy on test",
      p1.accuracy(p1.classify(test_bow_features, best_theta, 0), test_labels))
wordlist = [
    word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))
]
sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
print("Most Explanatory Word Features")
print(sorted_word_features[:10])